Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c2cpg] Recognize more source file extensions #5173

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,14 @@ trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: As
fixedTypeName
}

protected def registerMethodDeclaration(fullName: String, methodInfo: CGlobal.MethodInfo): Unit = {
global.methodDeclarations.putIfAbsent(fullName, methodInfo)
protected def registerMethodDeclaration(fullName: String, signature: String, methodInfo: CGlobal.MethodInfo): Unit = {
val methodKey = if (fullName.endsWith(signature)) fullName else s"$fullName:$signature"
global.methodDeclarations.putIfAbsent(methodKey, methodInfo)
}

protected def registerMethodDefinition(fullName: String): Unit = {
global.methodDefinitions.putIfAbsent(fullName, true)
protected def registerMethodDefinition(fullName: String, signature: String): Unit = {
val methodKey = if (fullName.endsWith(signature)) fullName else s"$fullName:$signature"
global.methodDefinitions.putIfAbsent(methodKey, true)
}

// Sadly, there is no predefined List / Enum of this within Eclipse CDT:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th

val methodInfo = CGlobal.MethodInfo(
name,
fullName,
code = codeString,
fileName = filename,
returnType = registerType(returnType),
Expand All @@ -163,7 +164,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
parameter = parameterNodeInfos,
modifier = modifierFor(funcDecl).map(_.modifierType)
)
registerMethodDeclaration(fullName, methodInfo)
registerMethodDeclaration(fullName, signature, methodInfo)
Ast()
case cVariable: CVariable =>
val name = shortName(funcDecl)
Expand Down Expand Up @@ -246,7 +247,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
protected def astForFunctionDefinition(funcDef: IASTFunctionDefinition): Ast = {
val filename = fileName(funcDef)
val MethodFullNameInfo(name, fullName, signature, returnType) = this.methodFullNameInfo(funcDef)
registerMethodDefinition(fullName)
registerMethodDefinition(fullName, signature)

val codeString = code(funcDef)
val methodNode_ = methodNode(funcDef, name, codeString, fullName, Some(signature), filename)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ object CGlobal {

final case class MethodInfo(
name: String,
fullName: String,
code: String,
fileName: String,
returnType: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import org.eclipse.cdt.core.parser.DefaultLogService
import org.eclipse.cdt.core.parser.FileContent
import org.eclipse.cdt.core.parser.ScannerInfo
import org.eclipse.cdt.internal.core.dom.parser.cpp.semantics.CPPVisitor
import org.eclipse.cdt.internal.core.parser.scanner.InternalFileContent
import org.slf4j.LoggerFactory

import java.nio.file.NoSuchFileException
Expand All @@ -35,9 +34,10 @@ object CdtParser {
failure: Option[Throwable] = None
)

private def readFileAsFileContent(path: Path): InternalFileContent = {
val lines = IOUtils.readLinesInFile(path).mkString("\n").toArray
FileContent.create(path.toString, true, lines).asInstanceOf[InternalFileContent]
private def readFileAsFileContent(file: File, lines: Option[Array[Char]] = None): FileContent = {
val codeLines = lines.getOrElse(IOUtils.readLinesInFile(file.path).mkString("\n").toArray)
val isSource = FileDefaults.hasSourceFileExtension(file.pathAsString)
FileContent.create(file.pathAsString, isSource, codeLines)
}

}
Expand All @@ -62,17 +62,17 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma
if (config.noImageLocations) opts |= ILanguage.OPTION_NO_IMAGE_LOCATIONS

private def preprocessedFileIsFromCPPFile(file: Path, code: String): Boolean = {
if (config.withPreprocessedFiles && file.toString.endsWith(FileDefaults.PREPROCESSED_EXT)) {
val fileWithoutExt = file.toString.stripSuffix(FileDefaults.PREPROCESSED_EXT)
val filesWithCPPExt = FileDefaults.CPP_FILE_EXTENSIONS.map(ext => File(s"$fileWithoutExt$ext").name)
if (config.withPreprocessedFiles && FileDefaults.hasPreprocessedFileExtension(file.toString)) {
val fileWithoutExt = file.toString.substring(0, file.toString.lastIndexOf("."))
val filesWithCPPExt = FileDefaults.CppFileExtensions.map(ext => File(s"$fileWithoutExt$ext").name)
code.linesIterator.exists(line => filesWithCPPExt.exists(f => line.contains(s"\"$f\"")))
} else {
false
}
}

private def createParseLanguage(file: Path, code: String): ILanguage = {
if (FileDefaults.isCPPFile(file.toString) || preprocessedFileIsFromCPPFile(file, code)) {
if (FileDefaults.hasCppFileExtension(file.toString) || preprocessedFileIsFromCPPFile(file, code)) {
GPPLanguage.getDefault
} else {
GCCLanguage.getDefault
Expand All @@ -81,7 +81,7 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma

private def createScannerInfo(file: Path): ScannerInfo = {
val additionalIncludes =
if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP
if (FileDefaults.hasCppFileExtension(file.toString)) parserConfig.systemIncludePathsCPP
else parserConfig.systemIncludePathsC
val fileSpecificDefines = parserConfig.definedSymbolsPerFile.getOrElse(file.toString, Map.empty)
val fileSpecificIncludes = parserConfig.includesPerFile.getOrElse(file.toString, mutable.LinkedHashSet.empty)
Expand All @@ -103,14 +103,13 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma
translationUnit
}

private def parseInternal(file: Path): ParseResult = {
val realPath = File(file)
if (realPath.isRegularFile) { // handling potentially broken symlinks
private def parseInternal(file: File): ParseResult = {
if (file.isRegularFile) { // handling potentially broken symlinks
try {
val fileContent = readFileAsFileContent(realPath.path)
val fileContent = readFileAsFileContent(file.path)
val fileContentProvider = new CustomFileContentProvider(headerFileFinder)
val lang = createParseLanguage(realPath.path, fileContent.toString)
val scannerInfo = createScannerInfo(realPath.path)
val lang = createParseLanguage(file.path, fileContent.toString)
val scannerInfo = createScannerInfo(file.path)
val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log)
val problems = CPPVisitor.getProblems(translationUnit)
if (parserConfig.logProblems) logProblems(problems.toList)
Expand All @@ -131,7 +130,8 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma
} else {
ParseResult(
None,
failure = Option(new NoSuchFileException(s"File '$realPath' does not exist. Check for broken symlinks!"))
failure =
Option(new NoSuchFileException(s"File '${file.pathAsString}' does not exist. Check for broken symlinks!"))
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
package io.joern.c2cpg.parser

import org.apache.commons.lang3.StringUtils

object FileDefaults {

val C_EXT: String = ".c"
val CPP_EXT: String = ".cpp"
val CPP_CXX_EXT: String = ".cxx"
val PREPROCESSED_EXT: String = ".i"
val CExt: String = ".c"
val CppExt: String = ".cpp"
val PreprocessedExt: String = ".i"

val HeaderFileExtensions: Set[String] =
Set(".h", ".hpp", ".hh", ".hp", ".hxx", ".h++", ".tcc")

val CppSourceFileExtensions: Set[String] =
Set(".cc", ".cxx", ".cpp", ".cp", ".ccm", ".cxxm", ".c++m")

private val CC_EXT = ".cc"
private val C_HEADER_EXT = ".h"
private val CPP_HEADER_EXT = ".hpp"
private val OTHER_HEADER_EXT = ".hh"
val CppFileExtensions: Set[String] =
CppSourceFileExtensions ++ HeaderFileExtensions

val SOURCE_FILE_EXTENSIONS: Set[String] = Set(C_EXT, CC_EXT, CPP_EXT, CPP_CXX_EXT)
val SourceFileExtensions: Set[String] =
CppSourceFileExtensions ++ Set(CExt)

val HEADER_FILE_EXTENSIONS: Set[String] = Set(C_HEADER_EXT, CPP_HEADER_EXT, OTHER_HEADER_EXT)
def hasCppFileExtension(filePath: String): Boolean =
CppFileExtensions.exists(ext => StringUtils.endsWithIgnoreCase(filePath, ext))

val CPP_FILE_EXTENSIONS: Set[String] = Set(CC_EXT, CPP_EXT, CPP_CXX_EXT, CPP_HEADER_EXT)
def hasSourceFileExtension(filePath: String): Boolean =
SourceFileExtensions.exists(ext => StringUtils.endsWithIgnoreCase(filePath, ext))

def isCPPFile(filePath: String): Boolean =
CPP_FILE_EXTENSIONS.exists(filePath.endsWith)
def hasPreprocessedFileExtension(filePath: String): Boolean =
StringUtils.endsWithIgnoreCase(filePath, PreprocessedExt)
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.joern.c2cpg.parser

import better.files.*
import io.joern.c2cpg.C2Cpg.DefaultIgnoredFolders
import io.joern.x2cpg.SourceFiles
import org.jline.utils.Levenshtein

Expand All @@ -9,7 +10,7 @@ import java.nio.file.Path
class HeaderFileFinder(root: String) {

private val nameToPathMap: Map[String, List[Path]] = SourceFiles
.determine(root, FileDefaults.HEADER_FILE_EXTENSIONS)
.determine(root, FileDefaults.HeaderFileExtensions, ignoredDefaultRegex = Option(DefaultIgnoredFolders))
.map { p =>
val file = File(p)
(file.name, file.path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
}

private def sourceFilesFromDirectory(): Array[String] = {
val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS
++ FileDefaults.HEADER_FILE_EXTENSIONS
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList
val sourceFileExtensions = FileDefaults.SourceFileExtensions
++ FileDefaults.HeaderFileExtensions
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PreprocessedExt).toList
val allSourceFiles = SourceFiles
.determine(
config.inputPath,
Expand All @@ -59,8 +59,8 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
.toArray
if (config.withPreprocessedFiles) {
allSourceFiles.filter {
case f if !f.endsWith(FileDefaults.PREPROCESSED_EXT) =>
val fAsPreprocessedFile = s"${f.substring(0, f.lastIndexOf("."))}${FileDefaults.PREPROCESSED_EXT}"
case f if !FileDefaults.hasPreprocessedFileExtension(f) =>
val fAsPreprocessedFile = s"${f.substring(0, f.lastIndexOf("."))}${FileDefaults.PreprocessedExt}"
!allSourceFiles.exists { sourceFile => f != sourceFile && sourceFile == fAsPreprocessedFile }
case _ => true
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ class FunctionDeclNodePass(cpg: Cpg, methodDeclarations: Map[String, CGlobal.Met
}

override def run(dstGraph: DiffGraphBuilder): Unit = {
methodDeclarations.foreach { case (fullName, methodNodeInfo) =>
val methodNode_ = methodNode(fullName, methodNodeInfo)
methodDeclarations.foreach { case (_, methodNodeInfo) =>
val methodNode_ = methodNode(methodNodeInfo.fullName, methodNodeInfo)
val parameterNodes = methodNodeInfo.parameter.map(p => Ast(parameterInNode(p)))
val stubAst =
methodStubAst(
Expand All @@ -162,7 +162,7 @@ class FunctionDeclNodePass(cpg: Cpg, methodDeclarations: Map[String, CGlobal.Met
methodNodeInfo,
methodNode_,
methodNodeInfo.name,
fullName,
methodNodeInfo.fullName,
methodNodeInfo.signature,
dstGraph
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class PreprocessorPass(config: Config) {
SourceFiles
.determine(
config.inputPath,
FileDefaults.SOURCE_FILE_EXTENSIONS,
FileDefaults.SourceFileExtensions,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ object FileHandlingTests {

class FileHandlingTests
extends Code2CpgFixture(() =>
new CDefaultTestCpg(FileDefaults.C_EXT) {
new CDefaultTestCpg(FileDefaults.CExt) {
override def codeFilePreProcessing(codeFile: Path): Unit = {
if (codeFile.toString.endsWith(FileHandlingTests.brokenLinkedFile)) {
File(codeFile).delete().symbolicLinkTo(File("does/not/exist.c"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.codepropertygraph.generated.ControlStructureTypes
import io.shiftleft.semanticcpg.language.*

class ControlStructureTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
class ControlStructureTests extends C2CpgSuite(FileDefaults.CppExt) {

"ControlStructureTest1" should {
val cpg = code("""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ class HeaderAstCreationPassTests extends C2CpgSuite {
case Seq(bar, foo, m, printf) =>
// note that we don't see bar twice even so it is contained
// in main.h and included in main.c and we do scan both
bar.fullName shouldBe "bar"
bar.fullName shouldBe "bar:void()"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it would be a regression for C code, in order to improve C++ code. I guess there just is inherent ambiguity with .h files, whether they contain C or C++ code. But I don't think we can get away with this right now.

Maybe we do header files in a second pass after the regular files, so we can know whether they got included from C or C++ files? Or maybe CDT has some guess-the-file-type magic since the IDE runs into the same problem?

Copy link
Contributor Author

@max-leuthaeuser max-leuthaeuser Dec 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe CDT has some guess-the-file-type magic since the IDE runs into the same problem?

Sadly no, they also simply use the C++ parser in all cases.

The two-passes approach also won't work in all cases, as one could e.g. include a C header file in a C and C++ source file.

Copy link
Contributor Author

@max-leuthaeuser max-leuthaeuser Dec 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or to phrase it differently:
The behaviour without this PR is definitely wrong as it makes parsing CPP code in .h files impossible.
With this PR will are able to parse such code. "Wrong" fullnames for C method declarations that are never implemented in any source file (because there we will create the correct fullname and de-duplicate correctly) should be no issue or do I miss something there?

bar.filename shouldBe "main.h"
foo.fullName shouldBe "foo"
foo.fullName shouldBe "foo:int()"
foo.filename shouldBe "other.h"
// main is also deduplicated. It is defined within the header file,
// and has an actual implementation in the source file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ class CfgCreationPassTests extends CfgTestFixture(() => new CCfgTestCpg) {
}
}

class CppCfgCreationPassTests extends CfgTestFixture(() => new CCfgTestCpg(FileDefaults.CPP_EXT)) {
class CppCfgCreationPassTests extends CfgTestFixture(() => new CCfgTestCpg(FileDefaults.CppExt)) {
override def code(code: String): CCfgTestCpg = {
super.code(s"RET func() { $code }")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
class ClassTypeTests extends C2CpgSuite(FileDefaults.CppExt) {

"handling C++ classes (code example 1)" should {
val cpg = code("""
Expand Down Expand Up @@ -79,7 +79,7 @@ class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
x.aliasTypeFullName shouldBe None
x.order shouldBe 1
x.filename shouldBe "Test0.cpp"
x.filename.endsWith(FileDefaults.CPP_EXT) shouldBe true
x.filename.endsWith(FileDefaults.CppExt) shouldBe true
}

"should contain type decl for alias `mytype` of `int`" in {
Expand All @@ -91,7 +91,7 @@ class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
x.code shouldBe "typedef int mytype;"
x.order shouldBe 2
x.filename shouldBe "Test0.cpp"
x.filename.endsWith(FileDefaults.CPP_EXT) shouldBe true
x.filename.endsWith(FileDefaults.CppExt) shouldBe true
}

"should contain type decl for external type `int`" in {
Expand Down Expand Up @@ -126,7 +126,7 @@ class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
}

"should allow traversing from type to enclosing file" in {
cpg.typeDecl.file.filter(_.name.endsWith(FileDefaults.CPP_EXT)).l should not be empty
cpg.typeDecl.file.filter(_.name.endsWith(FileDefaults.CppExt)).l should not be empty
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import io.shiftleft.codepropertygraph.generated.nodes.Identifier
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class EnumTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CPP_EXT) {
class EnumTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CppExt) {

"Enums" should {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import io.shiftleft.codepropertygraph.generated.nodes.Identifier
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class NamespaceTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CPP_EXT) {
class NamespaceTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CppExt) {

"Namespaces" should {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class TemplateTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CPP_EXT) {
class TemplateTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CppExt) {

"Templates" should {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ package io.joern.c2cpg.testfixtures
import io.joern.c2cpg.parser.FileDefaults
import io.joern.x2cpg.testfixtures.Code2CpgFixture

class AstC2CpgSuite(fileSuffix: String = FileDefaults.C_EXT) extends Code2CpgFixture(() => new CAstTestCpg(fileSuffix))
class AstC2CpgSuite(fileSuffix: String = FileDefaults.CExt) extends Code2CpgFixture(() => new CAstTestCpg(fileSuffix))
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}
import io.joern.x2cpg.testfixtures.Code2CpgFixture

class C2CpgSuite(
fileSuffix: String = FileDefaults.C_EXT,
fileSuffix: String = FileDefaults.CExt,
withOssDataflow: Boolean = false,
semantics: Semantics = DefaultSemantics(),
withPostProcessing: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ package io.joern.c2cpg.testfixtures
import io.joern.c2cpg.parser.FileDefaults
import io.joern.x2cpg.testfixtures.CfgTestCpg

class CCfgTestCpg(override val fileSuffix: String = FileDefaults.C_EXT) extends CfgTestCpg with C2CpgFrontend {}
class CCfgTestCpg(override val fileSuffix: String = FileDefaults.CExt) extends CfgTestCpg with C2CpgFrontend {}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import io.joern.x2cpg.testfixtures.TestCpg
import io.shiftleft.semanticcpg.layers.LayerCreatorContext

class DataFlowTestCpg extends TestCpg with C2CpgFrontend {
override val fileSuffix: String = FileDefaults.C_EXT
override val fileSuffix: String = FileDefaults.CExt

override def applyPasses(): Unit = {
X2Cpg.applyDefaultOverlays(this)
Expand Down
Loading