From e015626f189dc76f8df9fdc25a47638c6a2f3feb Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 26 May 2025 21:58:17 -0700 Subject: [PATCH] MC: Allow .set to reassign non-MCConstantExpr expressions GNU Assembler supports symbol reassignment via .set, .equ, or =. However, LLVM's integrated assembler only allows reassignment for MCConstantExpr cases, as it struggles with scenarios like: ``` .data .set x, 0 .long x // reference the first instance x = .-.data .long x // reference the second instance .set x,.-.data .long x // reference the third instance ``` Between two assignments binds, we cannot ensure that a reference binds to the earlier assignment. We use MCSymbol::IsUsed and other conditions to reject potentially unsafe reassignments, but certain MCConstantExpr uses could be unsafe as well. This patch enables reassignment by cloning the symbol upon reassignment and updating the symbol table. Existing references to the original symbol remain unchanged, and the original symbol is excluded from the emitted symbol table. --- llvm/include/llvm/MC/MCContext.h | 5 ++++ llvm/include/llvm/MC/MCSymbol.h | 23 ++++++------------ llvm/lib/MC/ELFObjectWriter.cpp | 3 +-- llvm/lib/MC/MCContext.cpp | 29 ++++++++++++++++++++++ llvm/lib/MC/MCExpr.cpp | 3 --- llvm/lib/MC/MCParser/AsmParser.cpp | 28 ++++++--------------- llvm/lib/MC/MCParser/MasmParser.cpp | 10 ++++---- llvm/lib/MC/MCSymbol.cpp | 1 - llvm/test/MC/ARM/thumb_set-diagnostics.s | 27 +++++++-------------- llvm/test/MC/AsmParser/redef.s | 31 +++++++++++++++++++++--- 10 files changed, 92 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 73c6d57f107f..a2a53427be98 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -492,6 +492,11 @@ public: /// Get the symbol for \p Name, or null. MCSymbol *lookupSymbol(const Twine &Name) const; + /// Clone a symbol for the .set directive, replacing it in the symbol table. + /// Existing references to the original symbol remain unchanged, and the + /// original symbol is not emitted to the symbol table. + MCSymbol *cloneSymbol(MCSymbol &Sym); + /// Set value for a symbol. void setSymbolValue(MCStreamer &Streamer, const Twine &Sym, uint64_t Val); diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index 3885ce0bff37..77515c82e096 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -90,9 +90,6 @@ protected: /// True if this symbol can be redefined. unsigned IsRedefinable : 1; - /// IsUsed - True if this symbol has been used. - mutable unsigned IsUsed : 1; - mutable unsigned IsRegistered : 1; /// True if this symbol is visible outside this translation unit. Note: ELF @@ -165,9 +162,9 @@ protected: }; MCSymbol(SymbolKind Kind, const MCSymbolTableEntry *Name, bool isTemporary) - : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false), - IsRegistered(false), IsExternal(false), IsPrivateExtern(false), - IsWeakExternal(false), Kind(Kind), IsUsedInReloc(false), IsResolving(0), + : IsTemporary(isTemporary), IsRedefinable(false), IsRegistered(false), + IsExternal(false), IsPrivateExtern(false), IsWeakExternal(false), + Kind(Kind), IsUsedInReloc(false), IsResolving(0), SymbolContents(SymContentsUnset), CommonAlignLog2(0), Flags(0) { Offset = 0; HasName = !!Name; @@ -175,6 +172,9 @@ protected: getNameEntryPtr() = Name; } + MCSymbol(const MCSymbol &) = default; + MCSymbol &operator=(const MCSymbol &) = delete; + // Provide custom new/delete as we will only allocate space for a name // if we need one. void *operator new(size_t s, const MCSymbolTableEntry *Name, MCContext &Ctx); @@ -201,9 +201,6 @@ private: } public: - MCSymbol(const MCSymbol &) = delete; - MCSymbol &operator=(const MCSymbol &) = delete; - /// getName - Get the symbol name. StringRef getName() const { if (!HasName) @@ -224,9 +221,6 @@ public: /// isTemporary - Check if this is an assembler temporary symbol. bool isTemporary() const { return IsTemporary; } - /// isUsed - Check if this is used. - bool isUsed() const { return IsUsed; } - /// Check if this symbol is redefinable. bool isRedefinable() const { return IsRedefinable; } /// Mark this symbol as redefinable. @@ -306,10 +300,9 @@ public: return SymbolContents == SymContentsVariable; } - /// getVariableValue - Get the value for variable symbols. + /// Get the expression of the variable symbol. const MCExpr *getVariableValue(bool SetUsed = true) const { assert(isVariable() && "Invalid accessor!"); - IsUsed |= SetUsed; return Value; } @@ -404,7 +397,7 @@ public: return Fragment; // If the symbol is a non-weak alias, get information about // the aliasee. (Don't try to resolve weak aliases.) - Fragment = getVariableValue(false)->findAssociatedFragment(); + Fragment = getVariableValue()->findAssociatedFragment(); return Fragment; } diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 592081d81792..933a64d4ddc5 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -446,8 +446,7 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, // needs. MCBinaryExpr is not handled. const MCSymbolELF *Sym = &Symbol; while (Sym->isVariable()) { - if (auto *Expr = - dyn_cast(Sym->getVariableValue(false))) { + if (auto *Expr = dyn_cast(Sym->getVariableValue())) { Sym = cast(&Expr->getSymbol()); if (!Sym->getSize()) continue; diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index dc92acafb25d..e0e5e7bfc37d 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -309,6 +309,35 @@ MCSymbol *MCContext::createSymbolImpl(const MCSymbolTableEntry *Name, MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary); } +MCSymbol *MCContext::cloneSymbol(MCSymbol &Sym) { + MCSymbol *NewSym = nullptr; + auto Name = Sym.getNameEntryPtr(); + switch (getObjectFileType()) { + case MCContext::IsCOFF: + NewSym = new (Name, *this) MCSymbolCOFF(cast(Sym)); + break; + case MCContext::IsELF: + NewSym = new (Name, *this) MCSymbolELF(cast(Sym)); + break; + case MCContext::IsMachO: + NewSym = new (Name, *this) MCSymbolMachO(cast(Sym)); + break; + default: + reportFatalUsageError(".set redefinition is not supported"); + break; + } + // Set the name and redirect the `Symbols` entry to `NewSym`. + NewSym->getNameEntryPtr() = Name; + const_cast(Name)->second.Symbol = NewSym; + // Ensure the next `registerSymbol` call will add the new symbol to `Symbols`. + NewSym->setIsRegistered(false); + + // Ensure the original symbol is not emitted to the symbol table. + Sym.IsTemporary = true; + Sym.setExternal(false); + return NewSym; +} + MCSymbol *MCContext::createRenamableSymbol(const Twine &Name, bool AlwaysAddSuffix, bool IsTemporary) { diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index 6c9fc94a5fdf..70c9c5cf07bc 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -479,8 +479,6 @@ static bool canExpand(const MCSymbol &Sym, bool InSet) { if (Sym.isWeakExternal()) return false; - Sym.getVariableValue(true); - if (InSet) return true; return !Sym.isInSection(); @@ -508,7 +506,6 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, Asm->getContext().reportError( Sym.getVariableValue()->getLoc(), "cyclic dependency detected for symbol '" + Sym.getName() + "'"); - Sym.IsUsed = false; Sym.setVariableValue(MCConstantExpr::create(0, Asm->getContext())); } return false; diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 8019953f5fee..4e56931387ed 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -1231,14 +1231,14 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable()) { - auto V = Sym->getVariableValue(/*SetUsed*/ false); + auto V = Sym->getVariableValue(); bool DoInline = isa(V) && !Variant; if (auto TV = dyn_cast(V)) DoInline = TV->inlineAssignedExpr(); if (DoInline) { if (Variant) return Error(EndLoc, "unexpected modifier on variable reference"); - Res = Sym->getVariableValue(/*SetUsed*/ false); + Res = Sym->getVariableValue(); return false; } } @@ -6332,11 +6332,6 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, SMLoc EqualLoc = Parser.getTok().getLoc(); if (Parser.parseExpression(Value)) return Parser.TokError("missing expression"); - - // Note: we don't count b as used in "a = b". This is to allow - // a = b - // b = c - if (Parser.parseEOL()) return true; @@ -6344,22 +6339,13 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, // used as a symbol, or it is an absolute symbol). Sym = Parser.getContext().lookupSymbol(Name); if (Sym) { - // Diagnose assignment to a label. - // - // FIXME: Diagnostics. Note the location of the definition as a label. - // FIXME: Diagnose assignment to protected identifier (e.g., register name). if (!Sym->isUnset() && (!allow_redef || !Sym->isRedefinable())) return Parser.Error(EqualLoc, "redefinition of '" + Name + "'"); - else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable()) - ; // Allow redefinitions of undefined symbols only used in directives. - else if (Sym->isVariable() && !Sym->isUsed() && allow_redef) - ; // Allow redefinitions of variables that haven't yet been used. - else if (!Sym->isVariable()) - return Parser.Error(EqualLoc, "invalid assignment to '" + Name + "'"); - else if (!isa(Sym->getVariableValue())) - return Parser.Error(EqualLoc, - "invalid reassignment of non-absolute variable '" + - Name + "'"); + // If the symbol is redefinable, clone it and update the symbol table + // to the new symbol. Existing references to the original symbol remain + // unchanged. + if (Sym->isRedefinable()) + Sym = Parser.getContext().cloneSymbol(*Sym); } else if (Name == ".") { Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc); return false; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index c259de61aab4..c596006ba9a0 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1486,12 +1486,12 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable()) { - auto V = Sym->getVariableValue(/*SetUsed=*/false); + auto V = Sym->getVariableValue(); bool DoInline = isa(V); if (auto TV = dyn_cast(V)) DoInline = TV->inlineAssignedExpr(); if (DoInline) { - Res = Sym->getVariableValue(/*SetUsed=*/false); + Res = Sym->getVariableValue(); return false; } } @@ -3012,9 +3012,9 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); const MCConstantExpr *PrevValue = - Sym->isVariable() ? dyn_cast_or_null( - Sym->getVariableValue(/*SetUsed=*/false)) - : nullptr; + Sym->isVariable() + ? dyn_cast_or_null(Sym->getVariableValue()) + : nullptr; if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) { switch (Var.Redefinable) { case Variable::NOT_REDEFINABLE: diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp index 3ca85b76a35d..100f693a739e 100644 --- a/llvm/lib/MC/MCSymbol.cpp +++ b/llvm/lib/MC/MCSymbol.cpp @@ -45,7 +45,6 @@ void *MCSymbol::operator new(size_t s, const MCSymbolTableEntry *Name, } void MCSymbol::setVariableValue(const MCExpr *Value) { - assert(!IsUsed && "Cannot set a variable that has already been used."); assert(Value && "Invalid variable value!"); assert((SymbolContents == SymContentsUnset || SymbolContents == SymContentsVariable) && diff --git a/llvm/test/MC/ARM/thumb_set-diagnostics.s b/llvm/test/MC/ARM/thumb_set-diagnostics.s index 4c8c927f5dba..6872ce2b29c3 100644 --- a/llvm/test/MC/ARM/thumb_set-diagnostics.s +++ b/llvm/test/MC/ARM/thumb_set-diagnostics.s @@ -1,11 +1,12 @@ -@ RUN: not llvm-mc -triple armv7-eabi -o /dev/null %s 2>&1 | FileCheck %s -@ RUN: not llvm-mc -filetype=obj -triple=armv7-eabi -o /dev/null %s --defsym LOOP=1 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error: +@ RUN: rm -rf %t && split-file %s %t --leading-lines && cd %t +@ RUN: not llvm-mc -triple armv7-eabi -o /dev/null a.s 2>&1 | FileCheck %s +@ RUN: not llvm-mc -filetype=obj -triple=armv7-eabi -o /dev/null redef.s 2>&1 | FileCheck %s --check-prefix=ERR +@ RUN: not llvm-mc -filetype=obj -triple=armv7-eabi -o /dev/null cycle.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error: +//--- a.s .syntax unified .thumb - -.ifndef LOOP .thumb_set @ CHECK: error: expected identifier after '.thumb_set' @@ -42,29 +43,20 @@ @ CHECK: .thumb_set trailer_trash, 0x11fe1e55, @ CHECK: ^ +//--- redef.s .type alpha,%function alpha: nop - .type beta,%function beta: - bkpt - - .thumb_set beta, alpha - -@ CHECK: error: redefinition of 'beta' -@ CHECK: .thumb_set beta, alpha -@ CHECK: ^ +.thumb_set beta, alpha +@ ERR: [[#@LINE-1]]:18: error: redefinition of 'beta' variable_result = alpha + 1 .long variable_result .thumb_set variable_result, 1 -@ CHECK: error: invalid reassignment of non-absolute variable 'variable_result' -@ CHECK: .thumb_set variable_result, 1 -@ CHECK: ^ - -.else +//--- cycle.s .type recursive_use,%function .thumb_set recursive_use, recursive_use + 1 @ ERR2: [[#@LINE-1]]:41: error: cyclic dependency detected for symbol 'recursive_use' @@ -74,4 +66,3 @@ beta: .thumb_set recursive_use2, recursive_use2 + 1 @ ERR2: [[#@LINE-1]]:43: error: cyclic dependency detected for symbol 'recursive_use2' @ ERR2: [[#@LINE-2]]:43: error: expression could not be evaluated -.endif diff --git a/llvm/test/MC/AsmParser/redef.s b/llvm/test/MC/AsmParser/redef.s index 56a13b10169e..166cedf3725c 100644 --- a/llvm/test/MC/AsmParser/redef.s +++ b/llvm/test/MC/AsmParser/redef.s @@ -1,4 +1,17 @@ -# RUN: not llvm-mc -filetype=obj -triple=x86_64 %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=error: +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-objdump -ts %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 0000000000000000 l .text 0000000000000000 l +# CHECK-NEXT: 0000000000000008 l *ABS* 0000000000000000 x +# CHECK-NEXT: 0000000000000002 l *ABS* 0000000000000000 b +# CHECK-NEXT: 0000000000000003 l *ABS* 0000000000000000 c +# CHECK-NEXT: ffffffffffffffff l *ABS* 0000000000000000 a +# CHECK-NEXT: 0000000000000000 g .text 0000000000000000 l_v +# CHECK: Contents of section .data: +# CHECK-NEXT: 0000 00000000 04000000 08000000 . +# CHECK-NEXT: Contents of section .data1: +# CHECK-NEXT: 0000 010203ff 0203 ...... l: @@ -8,11 +21,23 @@ l: x = .-.data .long x .set x,.-.data -# CHECK: [[#@LINE-1]]:8: error: invalid reassignment of non-absolute variable 'x' -## TODO This should be allowed .long x .globl l_v .set l_v, l .globl l_v .set l_v, l + +.section .data1,"aw" +.equiv b, 2*a +.set a, 1 +.equiv c, 3*a + +.if b > a +.byte a, b, c +.endif + +.set a, -a +.if b > a +.byte a, b, c +.endif