popen
and evaluate the expression through it. The goal of this article is to step-by-step develop a fairly complex system using TDD, so only the standard C ++ library and the test framework built into the IDE will be used.LexerTests
. I will use this technique, such as a list of tests, in which those tests that I plan to write as follows will be recorded. It also contains thoughts about upcoming tests, which often arise during the writing of the current test and cannot be implemented immediately:Should
, as the subject is meant that which is mentioned in the name of the class. That is, Lexer ... should ... do A in response to B. This focuses the test on a small aspect of behavior and prevents it from growing in volume. TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(""); Assert::IsTrue(tokens.empty()); } };
TEST_METHOD
, respectively, creates the test method itself. It is necessary to take into account that an instance of a class is created only once before the launch of all tests in it. In Boost.Test, for example, an instance of a class is created anew each time before running each test. Therefore, the code that must be executed before each test will be placed in the method declared using the macro TEST_METHOD_INITIALIZE
, and the one that is later, in TEST_METHOD_CLEANUP
. All assertion methods are static and are located in the Assert
class. They are few, but they cover the main functionality.Tokenize
function in the Lexer
namespace, taking a string and returning std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
std::vector, Tokens
. .
#pragma once; #include <vector> namespace Interpreter { struct Token {}; typedef std::vector<Token> Tokens; namespace Lexer { inline Tokens Tokenize(std::string expr) { throw std::exception(); } } // namespace Lexer } // namespace Interpreter
, , , . , , The Transformation Priority Premise (TPP) . , , , , . . , , , . , , ( ) , , . , , . , TPP .
:
({} β nil) , . (nil β constant) . (constant β constant+) ( , ). (constant β scalar) , . (statement β statements) (break, continue, return ). (unconditional β if) . (scalar β array) / . (array β container) . (statement β recursion) . (if β while) . (expression β function) . (variable β assignment) .
.
inline Tokens Tokenize(std::string expr) { return{}; }
, . .
. . . . . .
, std::string
std::wstring
. , Unicode. .
TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); }
AssertRange
- , AreEqual
, , , .
AssertRange namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange
Operator
. wchar_t
, , .
enum class Operator : wchar_t { Plus = L'+', }; typedef Operator Token;
, , Assert
, ToString
, .
std::wstring ToString(const Token &) inline std::wstring ToString(const Token &token) { return{ static_cast<wchar_t>(token) }; }
, , . , (unconditional β if).
inline Tokens Tokenize(std::wstring expr) { if(expr.empty()) { return{}; } return{ static_cast<Operator>(expr[0]) }; }
. . . β¦
.
TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); }
. , , - . , :
, Token
. dynamic_cast
, . , , . std::function
. . Boost.Any, - . .
, . - . , , .
β¦ . . . .
.
enum class TokenType { Operator, Number }; class Token { public: Token(Operator) {} TokenType Type() const { return TokenType::Operator; } }; β¦ TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } };
ToString
TokenType
, , . .
TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); }
. (constant β scalar) .
class Token { public: Token(Operator) :m_type(TokenType::Operator) {} Token(double) :m_type(TokenType::Number) {} TokenType Type() const { return m_type; } private: TokenType m_type; };
β¦ . . . .
.
TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); }
.
class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} operator Operator() const { return m_operator; } β¦ Operator m_operator; };
.
TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); }
, , union
. .
Token class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: return "Unknown token."; } }
, , , . . .
. . . .
:
if(expr[0] >= '0' && expr[0] <= '9') { return{ (double) expr[0] - '0' }; } return{ static_cast<Operator>(expr[0]) };
, .
β¦ . .
TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); }
, C
, isdigit
, , atof
, , wchar_t
. (expression β function). .
inline Tokens Tokenize(std::wstring expr) { const wchar_t *current = expr.c_str(); if(!*current) return{}; if(iswdigit(*current)) return{ _wtof(current) }; return{ static_cast<Operator>(*current) }; }
. .
TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); }
, . , . . result
, .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); if(!*current) return result; if(iswdigit(*current)) { result.push_back(_wtof(current)); } else { result.push_back(static_cast<Operator>(*current)); } return result; }
: (if β while). , .
inline Tokens Tokenize(std::wstring expr) { Tokens result; const wchar_t *current = expr.c_str(); while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else { result.push_back(static_cast<Operator>(*current)); ++current; } } return result; }
wcstod
, _wtof
, . , . , .
. .
.
TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); }
(unconditional β if) , .
while(*current) { if(iswdigit(*current)) { wchar_t *end = nullptr; result.push_back(wcstod(current, &end)); current = end; } else if(*current == static_cast<wchar_t>(Operator::Plus)) { result.push_back(static_cast<Operator>(*current)); ++current; } else { ++current; } }
. . Detail
. Tokenize
.
inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); }
Detail::Tokenizer namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { return *m_current == static_cast<wchar_t>(Operator::Plus); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail
, . , , . .
TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); }
Operator
, .
enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', };
. IsOperator
Tokenizer
.
bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); }
. .
Interpreter.h #pragma once; #include <vector> #include <wchar.h> #include <algorithm> namespace Interpreter { enum class Operator : wchar_t { Plus = L'+', Minus = L'-', Mul = L'*', Div = L'/', LParen = L'(', RParen = L')', }; inline std::wstring ToString(const Operator &op) { return{ static_cast<wchar_t>(op) }; } enum class TokenType { Operator, Number }; inline std::wstring ToString(const TokenType &type) { switch(type) { case TokenType::Operator: return L"Operator"; case TokenType::Number: return L"Number"; default: throw std::out_of_range("TokenType"); } } class Token { public: Token(Operator op) :m_type(TokenType::Operator), m_operator(op) {} Token(double num) :m_type(TokenType::Number), m_number(num) {} TokenType Type() const { return m_type; } operator Operator() const { if(m_type != TokenType::Operator) throw std::logic_error("Should be operator token."); return m_operator; } operator double() const { if(m_type != TokenType::Number) throw std::logic_error("Should be number token."); return m_number; } friend inline bool operator==(const Token &left, const Token &right) { if(left.m_type == right.m_type) { switch(left.m_type) { case Interpreter::TokenType::Operator: return left.m_operator == right.m_operator; case Interpreter::TokenType::Number: return left.m_number == right.m_number; default: throw std::out_of_range("TokenType"); } } return false; } private: TokenType m_type; union { Operator m_operator; double m_number; }; }; inline std::wstring ToString(const Token &token) { switch(token.Type()) { case TokenType::Number: return std::to_wstring(static_cast<double>(token)); case TokenType::Operator: return ToString(static_cast<Operator>(token)); default: throw std::out_of_range("TokenType"); } } typedef std::vector<Token> Tokens; namespace Lexer { namespace Detail { class Tokenizer { public: Tokenizer(const std::wstring &expr) : m_current(expr.c_str()) {} void Tokenize() { while(!EndOfExperssion()) { if(IsNumber()) { ScanNumber(); } else if(IsOperator()) { ScanOperator(); } else { MoveNext(); } } } const Tokens &Result() const { return m_result; } private: bool EndOfExperssion() const { return *m_current == L'\0'; } bool IsNumber() const { return iswdigit(*m_current) != 0; } void ScanNumber() { wchar_t *end = nullptr; m_result.push_back(wcstod(m_current, &end)); m_current = end; } bool IsOperator() const { auto all = { Operator::Plus, Operator::Minus, Operator::Mul, Operator::Div, Operator::LParen, Operator::RParen }; return std::any_of(all.begin(), all.end(), [this](Operator o) {return *m_current == static_cast<wchar_t>(o); }); } void ScanOperator() { m_result.push_back(static_cast<Operator>(*m_current)); MoveNext(); } void MoveNext() { ++m_current; } const wchar_t *m_current; Tokens m_result; }; } // namespace Detail inline Tokens Tokenize(const std::wstring &expr) { Detail::Tokenizer tokenizer(expr); tokenizer.Tokenize(); return tokenizer.Result(); } } // namespace Lexer } // namespace Interpreter
InterpreterTests.cpp #include "stdafx.h" #include "CppUnitTest.h" #include "Interpreter.h" namespace InterpreterTests { using namespace Microsoft::VisualStudio::CppUnitTestFramework; using namespace Interpreter; using namespace std; namespace AssertRange { template<class T, class ActualRange> static void AreEqual(initializer_list<T> expect, const ActualRange &actual) { auto actualIter = begin(actual); auto expectIter = begin(expect); Assert::AreEqual(distance(expectIter, end(expect)), distance(actualIter, end(actual)), L"Size differs."); for(; expectIter != end(expect) && actualIter != end(actual); ++expectIter, ++actualIter) { auto message = L"Mismatch in position " + to_wstring(distance(begin(expect), expectIter)); Assert::AreEqual<T>(*expectIter, *actualIter, message.c_str()); } } } // namespace AssertRange TEST_CLASS(LexerTests) { public: TEST_METHOD(Should_return_empty_token_list_when_put_empty_expression) { Tokens tokens = Lexer::Tokenize(L""); Assert::IsTrue(tokens.empty()); } TEST_METHOD(Should_tokenize_single_plus_operator) { Tokens tokens = Lexer::Tokenize(L"+"); AssertRange::AreEqual({ Operator::Plus }, tokens); } TEST_METHOD(Should_tokenize_single_digit) { Tokens tokens = Lexer::Tokenize(L"1"); AssertRange::AreEqual({ 1.0 }, tokens); } TEST_METHOD(Should_tokenize_floating_point_number) { Tokens tokens = Lexer::Tokenize(L"12.34"); AssertRange::AreEqual({ 12.34 }, tokens); } TEST_METHOD(Should_tokenize_plus_and_number) { Tokens tokens = Lexer::Tokenize(L"+12.34"); AssertRange::AreEqual({ Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_skip_spaces) { Tokens tokens = Lexer::Tokenize(L" 1 + 12.34 "); AssertRange::AreEqual({ Token(1.0), Token(Operator::Plus), Token(12.34) }, tokens); } TEST_METHOD(Should_tokenize_complex_experssion) { Tokens tokens = Lexer::Tokenize(L"1+2*3/(4-5)"); AssertRange::AreEqual({ Token(1), Token(Operator::Plus), Token(2), Token(Operator::Mul), Token(3), Token(Operator::Div), Token(Operator::LParen), Token(4), Token(Operator::Minus), Token(5), Token(Operator::RParen) }, tokens); } }; TEST_CLASS(TokenTests) { public: TEST_METHOD(Should_get_type_for_operator_token) { Token opToken(Operator::Plus); Assert::AreEqual(TokenType::Operator, opToken.Type()); } TEST_METHOD(Should_get_type_for_number_token) { Token numToken(1.2); Assert::AreEqual(TokenType::Number, numToken.Type()); } TEST_METHOD(Should_get_operator_code_from_operator_token) { Token token(Operator::Plus); Assert::AreEqual<Operator>(Operator::Plus, token); } TEST_METHOD(Should_get_number_value_from_number_token) { Token token(1.23); Assert::AreEqual<double>(1.23, token); } }; }
GitHub . , . "__".
. , .
Source: https://habr.com/ru/post/231657/
All Articles