snprintf(buf, 16, "%s", str)
, where str
is very long string; this behavior was of no use to me FIND_PACKAGE(LLVM REQUIRED) SET(CLANGPLUGINSRC plugin.cc printf_check.cc) ADD_LIBRARY(rspamd-clang SHARED ${CLANGPLUGINSRC}) SET_TARGET_PROPERTIES(rspamd-clang PROPERTIES COMPILE_FLAGS "${LLVM_CXX_FLAGS} ${LLVM_CPP_FLAGS} ${LLVM_C_FLAGS}" INCLUDE_DIRECTORIES ${LIBCLANG_INCLUDE_DIR} LINKER_LANGUAGE CXX) TARGET_LINK_LIBRARIES(rspamd-clang ${LIBCLANG_LIBRARIES}) LINK_DIRECTORIES(${LLVM_LIBRARY_DIRS})
-Xclang opt1 -Xclang opt2
into -Xclang opt1 opt2
, which completely broke the compilation. Found a way out through direct installation CMAKE_C_FLAGS
: IF (ENABLE_CLANG_PLUGIN MATCHES "ON") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -load -Xclang ${CMAKE_CURRENT_BINARY_DIR}/../clang-plugin/librspamd-clang.so -Xclang -add-plugin -Xclang rspamd-ast") ENDIF ()
-Xclang -plugin
, as recommended in almost all examples, clang stops compiling the source code (that is, it does not generate object files), performing only an analysis. The way out was to replace -Xclang -plugin
with -Xclang -add-plugin
, which was found after some meditation on the issue of Google.clang::FrontendPluginRegistry::Add
, which registers the plugin for clang. This method is a template method, and it accepts a class type that inherits from clang::PluginASTAction
and defines the necessary methods in it: class RspamdASTAction : public PluginASTAction { protected: std::unique_ptr <ASTConsumer> CreateASTConsumer (CompilerInstance &CI, llvm::StringRef) override { return llvm::make_unique<RspamdASTConsumer> (CI); } bool ParseArgs (const CompilerInstance &CI, const std::vector <std::string> &args) override { return true; } void PrintHelp (llvm::raw_ostream &ros) { ros << "Nothing here\n"; } }; static FrontendPluginRegistry::Add <rspamd::RspamdASTAction> X ("rspamd-ast", "rspamd ast checker");
CreateASTConsumer
method, which tells the clang that the resulting object must be called at the stage when the compiler has translated the code into the syntax tree. All further work is carried out in ASTConsumer, which in turn defines the HandleTranslationUnit
method, which, in fact, gets the context of the syntax tree. CompilerInstance
used to control the compiler, for example, to generate errors and warnings, which is extremely convenient when working with a plugin. The entire ASTConsumer is described as: class RspamdASTConsumer : public ASTConsumer { CompilerInstance &Instance; public: RspamdASTConsumer (CompilerInstance &Instance) : Instance (Instance) { } void HandleTranslationUnit (ASTContext &context) override { rspamd::PrintfCheckVisitor v(&context, Instance); v.TraverseDecl (context.getTranslationUnitDecl ()); } };
class PrintfCheckVisitor : public clang::RecursiveASTVisitor<PrintfCheckVisitor> { class impl; std::unique_ptr<impl> pimpl; public: PrintfCheckVisitor (clang::ASTContext *ctx, clang::CompilerInstance &ci); virtual ~PrintfCheckVisitor (void); bool VisitCallExpr (clang::CallExpr *E); };
clang::RecursiveASTVisitor
, which performs tree traversal, and the definition of the VisitCallExpr
method, which is called when it is in a function call tree. In this method (proxied in pimpl), the main work is performed on parsing functions and their arguments. The method starts like this: bool VisitCallExpr (CallExpr *E) { auto callee = dyn_cast<NamedDecl> (E->getCalleeDecl ()); if (callee == NULL) { llvm::errs () << "Bad callee\n"; return false; } auto fname = callee->getNameAsString (); auto pos_it = printf_functions.find (fname); if (pos_it != printf_functions.end ()) {
printf_functions
hash, whether this function is of interest to us: printf_functions = { {"rspamd_printf", 0}, {"rspamd_default_log_function", 4}, {"rspamd_snprintf", 2}, {"rspamd_fprintf", 1} };
const auto args = E->getArgs (); auto pos = pos_it->second; auto query = args[pos]; if (!query->isEvaluatable (*pcontext)) { print_warning (std::string ("cannot evaluate query"), E, this->pcontext, this->ci); return false; } clang::Expr::EvalResult r; if (!query->EvaluateAsRValue (r, *pcontext)) { print_warning (std::string ("cannot evaluate rvalue of query"), E, this->pcontext, this->ci); return false; } auto qval = dyn_cast<StringLiteral> ( r.Val.getLValueBase ().get<const Expr *> ()); if (!qval) { print_warning (std::string ("bad or absent query string"), E, this->pcontext, this->ci); return false; }
StringLiteral
. If the calculation is not needed, then you can take Expr *
directly and bring it to StringLiteral
, which greatly simplifies the code. struct PrintfArgChecker { private: arg_parser_t parser; public: int width; int precision; bool is_unsigned; ASTContext *past; CompilerInstance *pci; PrintfArgChecker (arg_parser_t _p, ASTContext *_ast, CompilerInstance *_ci) : parser (_p), past (_ast), pci(_ci) { width = 0; precision = 0; is_unsigned = false; } virtual ~PrintfArgChecker () { } bool operator() (const Expr *e) { return parser (e, this); } };
Expr *
) and checks its type against the specified one. Then we just check all the arguments after the query string for the types: if (parsers->size () != E->getNumArgs () - (pos + 1)) { std::ostringstream err_buf; err_buf << "number of arguments for " << fname << " missmatches query string '" << qval->getString ().str () << "', expected " << parsers->size () << " args" << ", got " << (E->getNumArgs () - (pos + 1)) << " args"; print_error (err_buf.str (), E, this->pcontext, this->ci); return false; } else { for (auto i = pos + 1; i < E->getNumArgs (); i++) { auto arg = args[i]; if (arg) { if (!parsers->at (i - (pos + 1)) (arg)) { return false; } } } }
print_error
function print_error
interesting because it can print a compilation error and stop the compilation process. This is done through CompilerInstance
, but in a rather non-obvious way: static void print_error (const std::string &err, const Expr *e, const ASTContext *ast, CompilerInstance *ci) { auto loc = e->getExprLoc (); auto &diag = ci->getDiagnostics (); auto id = diag.getCustomDiagID (DiagnosticsEngine::Error, "format query error: %0"); diag.Report (loc, id) << err; }
DiagnosticsEngine::Warning
.clang::BuiltinType::Kind
, which defines all types known to Klang. Possible values ​​can be searched in /usr/include/clang/AST/BuiltinTypes.def
(for Linux). There are two subtleties:if (sizeof (int32_t) == sizeof (int)) {...} if (sizeof (int32_t) == sizeof (long)) {...}
typedef my_int int
static bool check_builtin_type (const Expr *arg, struct PrintfArgChecker *ctx, const std::vector <BuiltinType::Kind> &k, const std::string &fmt) { auto type = arg->getType ().split ().Ty; auto desugared_type = type->getUnqualifiedDesugaredType (); if (!desugared_type->isBuiltinType ()) { print_error ( std::string ("not a builtin type for ") + fmt + " arg: " + arg->getType ().getAsString (), arg, ctx->past, ctx->pci); return false; } auto builtin_type = dyn_cast<BuiltinType> (desugared_type); auto kind = builtin_type->getKind (); auto found = false; for (auto kk : k) { if (kind == kk) { found = true; break; } } if (!found) { print_error ( std::string ("bad argument for ") + fmt + " arg: " + arg->getType ().getAsString () + ", resolved as: " + builtin_type->getNameAsCString (ctx->past->getPrintingPolicy ()), arg, ctx->past, ctx->pci); return false; } return true; }
getUnqualifiedDesugaredType
method is used to remove aliases, and the arg->getType()
method is used to get the type of expression from the expression. But this method returns the qualified type (for example, including the const
specifier), which is not needed for this task, so the split
type is split
, and only the pure type is taken from the resulting structure. static bool check_struct_type (const Expr *arg, struct PrintfArgChecker *ctx, const std::string &sname, const std::string &fmt) { auto type = arg->getType ().split ().Ty; if (!type->isPointerType ()) { print_error ( std::string ("bad string argument for %s: ") + arg->getType ().getAsString (), arg, ctx->past, ctx->pci); return false; } auto ptr_type = type->getPointeeType ().split ().Ty; auto desugared_type = ptr_type->getUnqualifiedDesugaredType (); if (!desugared_type->isRecordType ()) { print_error ( std::string ("not a record type for ") + fmt + " arg: " + arg->getType ().getAsString (), arg, ctx->past, ctx->pci); return false; } auto struct_type = desugared_type->getAsStructureType (); auto struct_decl = struct_type->getDecl (); auto struct_def = struct_decl->getNameAsString (); if (struct_def != sname) { print_error (std::string ("bad argument '") + struct_def + "' for " + fmt + " arg: " + arg->getType ().getAsString (), arg, ctx->past, ctx->pci); return false; } return true; }
type->getPointeeType().split().Ty
. Then we perform desugaring and find the type declaration: struct_type->getDecl()
. After that, checks are done in a rather trivial way.[44%] Building C object src / CMakeFiles / rspamd-server.dir / libutil / map.co src / libutil / map.c: 906: 46: error: format error query for% z arg: guint, resolved as: unsigned int msg_info_pool ("read hash of% z elements", g_hash_table_size ^ src / libutil / logger.h: 190: 9: note: expanded from macro 'msg_info_pool' __VA_ARGS__) ^ 1 error generated.
[45%] Building C object src / CMakeFiles / rspamd-server.dir / libserver / protocol.co src / libserver / protocol.c: 373: 45: error: format query argument: bad argument 'f_str_tok' for% V arg: rspamd_ftok_t * msg_err_task ("bad from header: '% V'", h-> value); ^ src / libutil / logger.h: 164: 9: note: expanded from macro 'msg_err_task' __VA_ARGS__) ^ 1 error generated. [44%] Building C object src / CMakeFiles / rspamd-server.dir / libstat / tokenizers / osb.co src / libstat / tokenizers / osb.c: 128: 48: error: format query: string% argument:% gsize msg_warn ("siphash key is too short:% s", keylen); ^ src / libutil / logger.h: 145: 9: note: expanded from macro 'msg_warn' __VA_ARGS__) ^ 1 error generated.
[46%] Building C object src / CMakeFiles / rspamd-server.dir / libmime / mime_expressions.co src / libmime / mime_expressions.c: 780: 3: error: format query number for arguments rspamd_default_log_function missmatches query string 'process test regexp% s for url% s returned FALSE', expected 2 args, got 1 args msg_info_task ("process test regexp% s for url% s returned FALSE", ^ src / libutil / logger.h: 169: 30: note: expanded from macro 'msg_info_task' #define msg_info_task (...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ ^ 1 error generated.
Source: https://habr.com/ru/post/270743/
All Articles