nim-lang · Araq · Dec 9, 2022 · Dec 8, 2022 · Dec 8, 2022 · Dec 8, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 nimcache
 testsuite/tests/*.nim
 testsuite/cppkeepbodies/*.nim
+testsuite/cextras/*.nim
 testsuite/tester
 *.exe
 /c2nim
diff --git a/c2nim.nim b/c2nim.nim
@@ -7,7 +7,7 @@
 #    distribution, for details about the copyright.
 #
 
-import std / [strutils, os, times, parseopt, strscans]
+import std / [strutils, os, times, md5, parseopt, strscans]
 
 import compiler/ [llstream, ast, renderer, options, msgs, nversion]
 
@@ -43,6 +43,10 @@ Options:
   --importc              annotate procs with ``{.importc.}``
   --importdefines        import C defines as procs or vars with ``{.importc.}``
   --importfuncdefines    import C define funcs as procs with ``{.importc.}``
+  --def:SYM='macro()'    define a C macro that gets replaced with the given
+                         definition. It's parsed by the lexer. Use it to fix
+                         function attributes: ``--def:PUBLIC='__attribute__ ()'``
+  --reordercomments      reorder C comments to match Nim's postfix style
   --ref                  convert typ* to ref typ (default: ptr typ)
   --prefix:PREFIX        strip prefix for the generated Nim identifiers
                          (multiple --prefix options are supported)
@@ -52,6 +56,7 @@ Options:
                          for example `--mangle:'{u?}int{\d+}_t=$1int$2'` to
                          convert C <stdint.h> to Nim equivalents
                          (multiple --mangle options are supported)
+  --stdints              Mangle C stdint's into Nim style int's
   --paramprefix:PREFIX   add prefix to parameter name of the generated Nim proc
   --assumedef:IDENT      skips #ifndef sections for the given C identifier
                          (multiple --assumedef options are supported)
@@ -87,7 +92,10 @@ proc parse(infile: string, options: PParserOptions; dllExport: var PNode): PNode
   var p: Parser
   if isCpp: options.flags.incl pfCpp
   openParser(p, infile, stream, options)
-  result = parseUnit(p).postprocess(pfStructStruct in options.flags)
+  result = parseUnit(p).postprocess(
+    structStructMode = pfStructStruct in options.flags,
+    reorderComments = pfReorderComments in options.flags
+  )
   closeParser(p)
   if isCpp: options.flags.excl pfCpp
   if options.exportPrefix.len > 0:
@@ -100,18 +108,52 @@ proc parse(infile: string, options: PParserOptions; dllExport: var PNode): PNode
 
 proc isC2nimFile(s: string): bool = splitFile(s).ext.toLowerAscii == ".c2nim"
 
+proc parseDefines(val: string): seq[ref Token] =
+  let tpath = getTempDir() / "macro_" & getMD5(val) & ".h"
+  let tfl = (open(tpath, fmReadWrite), tpath)
+  let ss = llStreamOpen(val)
+  var lex: Lexer
+  openLexer(lex, tfl[1], ss)
+  var tk = new Token
+  var idx = 0
+  result = newSeq[ref Token]()
+  while tk.xkind != pxEof:
+    tk = new Token
+    lex.getTok(tk[])
+    if tk.xkind == pxEof:
+      break
+    result.add tk
+    inc idx
+    if idx > 1_000: raise newException(Exception, "parse error")
+  tfl[0].close()
+  tfl[1].removeFile()
+
+proc parseDefineArgs(parserOptions: var PParserOptions, val: string) =
+  let defs = val.split("=")
+  var mc: cparser.Macro
+  let macs = parseDefines(defs[0])
+  let toks = parseDefines(defs[1])
+  mc.name = macs[0].s
+  mc.params = -1
+  mc.body = toks
+  for m in macs[1..^1]:
+    if m.xkind == pxParLe: mc.params = 0
+    if m.xkind == pxSymbol: inc mc.params
+  parserOptions.macros.add(mc)
+
+
 var dummy: PNode
 
 when not compiles(renderModule(dummy, "")):
   # newer versions of 'renderModule' take 2 parameters. We workaround this
   # problem here:
-  proc renderModule(tree: PNode; filename: string) =
-    renderModule(tree, filename, filename)
+  proc renderModule(tree: PNode; filename: string, renderFlags: TRenderFlags) =
+    renderModule(tree, filename, filename, renderFlags)
 
-proc myRenderModule(tree: PNode; filename: string) =
+proc myRenderModule(tree: PNode; filename: string, renderFlags: TRenderFlags) =
   # also ensure we produced no trailing whitespace:
   let tmpFile = filename & ".tmp"
-  renderModule(tree, tmpFile)
+  renderModule(tree, tmpFile, renderFlags)
 
   let b = readFile(tmpFile)
   removeFile(tmpFile)
@@ -149,21 +191,21 @@ proc main(infiles: seq[string], outfile: var string,
       if not isC2nimFile(infile):
         if outfile.len == 0: outfile = changeFileExt(infile, "nim")
         for n in m: tree.add(n)
-    myRenderModule(tree, outfile)
+    myRenderModule(tree, outfile, options.renderFlags)
   else:
     for infile in infiles:
       let m = parse(infile, options, dllexport)
       if not isC2nimFile(infile):
         if outfile.len > 0:
-          myRenderModule(m, outfile)
+          myRenderModule(m, outfile, options.renderFlags)
           outfile = ""
         else:
           let outfile = changeFileExt(infile, "nim")
-          myRenderModule(m, outfile)
+          myRenderModule(m, outfile, options.renderFlags)
   if dllexport != nil:
     let (path, name, _) = infiles[0].splitFile
     let outfile = path / name & "_dllimpl" & ".nim"
-    myRenderModule(dllexport, outfile)
+    myRenderModule(dllexport, outfile, options.renderFlags)
   when declared(NimCompilerApiVersion):
     rawMessage(gConfig, hintSuccessX, [$gLinesCompiled, $(getTime() - start),
                               formatSize(getTotalMem()), ""])
@@ -195,8 +237,13 @@ for kind, key, val in getopt():
            " use a list of files and --concat instead"
     of "exportdll":
       parserOptions.exportPrefix = val
+    of "def":
+      parserOptions.parseDefineArgs(val)
     else:
-      if not parserOptions.setOption(key, val):
+      if key.normalize == "render":
+        if not parserOptions.renderFlags.setOption(val):
+          quit("[Error] unknown option: " & key)
+      elif not parserOptions.setOption(key, val):
         quit("[Error] unknown option: " & key)
   of cmdEnd: assert(false)
 if infiles.len == 0:

diff --git a/clexer.nim b/clexer.nim
@@ -107,6 +107,7 @@ type
     base*: NumericalBase      # the numerical base; only valid for int
                               # or float literals
     next*: ref Token          # for C we need arbitrary look-ahead :-(
+    lineNumber*: int          # line number
 
   Lexer* = object of TBaseLexer
     fileIdx*: (when declared(FileIndex): FileIndex else: int32)
@@ -578,8 +579,10 @@ proc scanLineComment(L: var Lexer, tok: var Token) =
   tok.xkind = pxLineComment
   var col = getColNumber(L, pos)
   while true:
-    inc(pos, 2)               # skip //
-    #add(tok.s, '#')
+    # FIXME: this should be inc(pos, 3) to not double count space?
+    inc(pos, 2) # skip //
+    if buf[pos] == '/':
+      inc(pos, 1) # skip /// 
     while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
       add(tok.s, buf[pos])
       inc(pos)
@@ -601,6 +604,9 @@ proc scanStarComment(L: var Lexer, tok: var Token) =
   var buf = L.buf
   tok.s = ""
   tok.xkind = pxStarComment
+  # skip initial /** 
+  if buf[pos] == '*' and buf[pos] != '/':
+    inc(pos)
   while true:
     case buf[pos]
     of CR, LF:
@@ -613,7 +619,6 @@ proc scanStarComment(L: var Lexer, tok: var Token) =
       #  */
       let oldPos = pos
       while buf[pos] in {' ', '\t'}:
-        #add(tok.s, ' ')
         inc(pos)
       if buf[pos] == '*':
         if buf[pos+1] != '/':
@@ -750,6 +755,7 @@ proc getTok*(L: var Lexer, tok: var Token) =
   skip(L, tok)
   if tok.xkind == pxNewLine: return
   var c = L.buf[L.bufpos]
+  tok.lineNumber = L.lineNumber
   if c in SymStartChars:
     getSymbol(L, tok)
     if L.buf[L.bufpos] == '"':

diff --git a/compiler/renderer.nim b/compiler/renderer.nim
@@ -15,7 +15,7 @@ import
 type
   TRenderFlag* = enum
     renderNone, renderNoBody, renderNoComments, renderDocComments,
-    renderNoPragmas, renderIds, renderNoProcDefs, renderSyms
+    renderNoPragmas, renderIds, renderNoProcDefs, renderSyms, renderExtraNewLines
   TRenderFlags* = set[TRenderFlag]
   TRenderTok* = object
     kind*: TTokType
@@ -43,6 +43,12 @@ type
     fid*: FileIndex
     config*: ConfigRef
 
+proc setOption*(renderOptions: var TRenderFlags, val: string): bool =
+  result = true
+  case val.normalize
+  of "extranewlines": incl(renderOptions, renderExtraNewLines)
+  else: result = false
+
 # We render the source code in a two phases: The first
 # determines how long the subtree will likely be, the second
 # phase appends to a buffer that will be the output.
@@ -1281,6 +1287,7 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
   of nkStaticStmt: gstaticStmt(g, n)
   of nkAsmStmt: gasm(g, n)
   of nkProcDef:
+    if renderExtraNewLines in g.flags: putNL(g)
     if renderNoProcDefs notin g.flags: putWithSpace(g, tkProc, "proc")
     gproc(g, n)
   of nkFuncDef:

diff --git a/cparser.nim b/cparser.nim
@@ -46,21 +46,23 @@ type
     pfIgnoreRValueRefs, ## transform C++'s 'T&&' to 'T'
     pfKeepBodies,       ## do not skip C++ method bodies
     pfAssumeIfIsTrue,   ## assume #if is true
-    pfStructStruct      ## do not treat struct Foo Foo as a forward decl
+    pfStructStruct,     ## do not treat struct Foo Foo as a forward decl
+    pfReorderComments   ## do not treat struct Foo Foo as a forward decl
 
-  Macro = object
-    name: string
-    params: int # number of parameters; 0 for empty (); -1 for no () at all
-    body: seq[ref Token] # can contain pxMacroParam tokens
+  Macro* = object
+    name*: string
+    params*: int # number of parameters; 0 for empty (); -1 for no () at all
+    body*: seq[ref Token] # can contain pxMacroParam tokens
 
   ParserOptions = object ## shared parser state!
     flags*: set[ParserFlag]
+    renderFlags*: TRenderFlags
     prefixes, suffixes: seq[string]
     assumeDef, assumenDef: seq[string]
     mangleRules: seq[tuple[pattern: Peg, frmt: string]]
     privateRules: seq[Peg]
     dynlibSym, headerOverride: string
-    macros: seq[Macro]
+    macros*: seq[Macro]
     toMangle: StringTableRef
     classes: StringTableRef
     toPreprocess: StringTableRef
@@ -127,6 +129,7 @@ proc newParserOptions*(): PParserOptions =
     privateRules: @[],
     discardablePrefixes: @[],
     flags: {},
+    renderFlags: {},
     dynlibSym: "",
     headerOverride: "",
     toMangle: newStringTable(modeCaseSensitive),
@@ -166,6 +169,9 @@ proc setOption*(parserOptions: PParserOptions, key: string, val=""): bool =
   of "mangle":
     let vals = val.split("=")
     parserOptions.mangleRules.add((parsePeg(vals[0]), vals[1]))
+  of "stdints":
+    let vals = (r"{u?}int{\d+}_t", r"$1int$2")
+    parserOptions.mangleRules.add((parsePeg(vals[0]), vals[1]))
   of "skipinclude": incl(parserOptions.flags, pfSkipInclude)
   of "typeprefixes": incl(parserOptions.flags, pfTypePrefixes)
   of "skipcomments": incl(parserOptions.flags, pfSkipComments)
@@ -182,6 +188,7 @@ proc setOption*(parserOptions: PParserOptions, key: string, val=""): bool =
   of "assumeifistrue": incl(parserOptions.flags, pfAssumeIfIsTrue)
   of "discardableprefix": parserOptions.discardablePrefixes.add(val)
   of "structstruct": incl(parserOptions.flags, pfStructStruct)
+  of "reordercomments": incl(parserOptions.flags, pfReorderComments)
   of "isarray": parserOptions.isArray[val] = "true"
   else: result = false
 
@@ -202,6 +209,7 @@ proc parMessage(p: Parser, msg: TMsgKind, arg = "") =
   lexMessage(p.lex, msg, arg)
 
 proc parError(p: Parser, arg = "") =
+  # raise newException(Exception, arg)
   if p.backtrackB.len == 0:
     lexMessage(p.lex, errGenerated, arg)
   else:
@@ -249,7 +257,8 @@ proc findMacro(p: Parser): int =
 
 proc rawEat(p: var Parser, xkind: Tokkind) =
   if p.tok.xkind == xkind: rawGetTok(p)
-  else: parError(p, "token expected: " & tokKindToStr(xkind))
+  else:
+    parError(p, "token expected: " & tokKindToStr(xkind))
 
 proc parseMacroArguments(p: var Parser): seq[seq[ref Token]] =
   result = @[]
@@ -354,6 +363,7 @@ proc skipComAux(p: var Parser, n: PNode) =
     if pfSkipComments notin p.options.flags:
       if n.comment.len == 0: n.comment = p.tok.s
       else: add(n.comment, "\n" & p.tok.s)
+      n.info.line = p.tok.lineNumber.uint16
   else:
     parMessage(p, warnCommentXIgnored, p.tok.s)
   getTok(p)
@@ -370,6 +380,7 @@ proc getTok(p: var Parser, n: PNode) =
 
 proc expectIdent(p: Parser) =
   if p.tok.xkind != pxSymbol:
+    # raise newException(Exception, "error")
     parError(p, "identifier expected, but got: " & debugTok(p.lex, p.tok[]))
 
 proc eat(p: var Parser, xkind: Tokkind, n: PNode) =
@@ -397,7 +408,9 @@ proc addSon(father, a, b, c: PNode) =
   addSon(father, c)
 
 proc newNodeP(kind: TNodeKind, p: Parser): PNode =
-  result = newNodeI(kind, getLineInfo(p.lex))
+  var info = getLineInfo(p.lex)
+  info.line = p.tok.lineNumber.uint16
+  result = newNodeI(kind, info)
 
 proc newNumberNodeP(kind: TNodeKind, number: string, p: Parser): PNode =
   result = newNodeP(kind, p)
@@ -487,7 +500,7 @@ proc declKeyword(p: Parser, s: string): bool =
       "restrict", "inline", "__inline", "__cdecl", "__stdcall", "__syscall",
       "__fastcall", "__safecall", "void", "struct", "union", "enum", "typedef",
       "size_t", "short", "int", "long", "float", "double", "signed", "unsigned",
-      "char", "__declspec":
+      "char", "__declspec", "__attribute__":
     result = true
   of "class", "mutable", "constexpr", "consteval", "constinit", "decltype":
     result = p.options.flags.contains(pfCpp)
@@ -928,11 +941,23 @@ proc parseBitfield(p: var Parser, i: PNode): PNode =
   else:
     result = i
 
+import compiler/nimlexbase
+
 proc parseStructBody(p: var Parser, stmtList: PNode,
                      kind: TNodeKind = nkRecList): PNode =
   result = newNodeP(kind, p)
-  eat(p, pxCurlyLe, result)
+  let com = newNodeP(nkCommentStmt, p)
+  eat(p, pxCurlyLe, com)
+  if com.comment.len() > 0:
+    addSon(result, com)
   while p.tok.xkind notin {pxEof, pxCurlyRi}:
+    let ln = p.parLineInfo().line
+    if p.tok.xkind in {pxLineComment, pxStarComment}:
+      let com = newNodeP(nkCommentStmt, p)
+      com.info.line = p.tok.lineNumber.uint16
+      addSon(result, com)
+      skipComAux(p, com)
+      continue
     discard skipConst(p)
     var baseTyp: PNode
     if p.tok.xkind == pxSymbol and p.tok.s in ["struct", "union"]:
@@ -982,7 +1007,9 @@ proc parseStructBody(p: var Parser, stmtList: PNode,
       addSon(result, def)
       if p.tok.xkind != pxComma: break
       getTok(p, def)
-    eat(p, pxSemicolon, lastSon(result))
+
+    eat(p, pxSemicolon)
+
   eat(p, pxCurlyRi, result)
 
 proc enumPragmas(p: Parser, name: PNode; origName: string): PNode =
@@ -1135,6 +1162,10 @@ proc parseCallConv(p: var Parser, pragmas: PNode) =
       getTok(p, nil)
       eat(p, pxParLe, nil)
       while p.tok.xkind notin {pxEof, pxParRi}: getTok(p, nil)
+    of "__attribute__":
+      getTok(p, nil)
+      eat(p, pxParLe, nil)
+      while p.tok.xkind notin {pxEof, pxParRi}: getTok(p, nil)
     else: break
     getTok(p, nil)