Skip to content

Commit

Permalink
Merge pull request #528 from Privado-Inc/dev
Browse files Browse the repository at this point in the history
HTML Parser pass integration along with its adoption in WebFormCollectionTagger for release
  • Loading branch information
pandurangpatil authored Apr 27, 2023
2 parents 0ad7653 + 7a0908f commit b448690
Show file tree
Hide file tree
Showing 14 changed files with 822 additions and 43 deletions.
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ libraryDependencies ++= Seq(
"org.apache.logging.log4j" % "log4j-slf4j2-impl" % "2.19.0" % Runtime,
"org.apache.poi" % "poi-ooxml" % "5.2.2",
"com.github.jsqlparser" % "jsqlparser" % "4.6",
"org.apache.maven" % "maven-model" % "3.9.0"
"org.apache.maven" % "maven-model" % "3.9.0",
"net.sourceforge.htmlunit" % "htmlunit" % "2.70.0"
)

ThisBuild / Compile / scalacOptions ++= Seq("-feature", "-deprecation", "-language:implicitConversions")
Expand Down
5 changes: 3 additions & 2 deletions schema/src/main/scala/CpgExtSchema.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ class CpgExtSchema(builder: SchemaBuilder, cpgSchema: CpgSchema) {
import cpgSchema.base._
import cpgSchema.fs._
import cpgSchema.method._
import cpgSchema.typeSchema._
import cpgSchema.tagsAndLocation._

import cpgSchema.typeSchema._
import cpgSchema.hidden._
// Add node types, edge types, and properties here

val myProperty = builder
Expand Down Expand Up @@ -139,6 +139,7 @@ class CpgExtSchema(builder: SchemaBuilder, cpgSchema: CpgSchema) {
module.addOutEdge(edge = dependencies, inNode = dependency)
module.addOutEdge(edge = sourceFile, inNode = file)
dependency.addOutEdge(edge = sourceFile, inNode = file)
templateDOM.addOutEdge(edge = sourceFile, inNode = file)
}

object CpgExtSchema {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.layers.LayerCreatorContext
import org.slf4j.LoggerFactory
import ai.privado.languageEngine.java.passes.module.DependenciesNodePass
import ai.privado.passes.SQLParser
import ai.privado.passes.{HTMLParserPass, SQLParser}

import java.util.Calendar
import scala.util.{Failure, Success, Try}
Expand All @@ -82,10 +82,15 @@ object JavaProcessor {
println(
s"${TimeMetric.getNewTime()} - Property file pass done in \t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
)
println(s"${Calendar.getInstance().getTime} - HTML parser pass")
new HTMLParserPass(cpg, sourceRepoLocation, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - HTML parser pass done in \t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
)
println(s"${Calendar.getInstance().getTime} - SQL parser pass")
new SQLParser(cpg, sourceRepoLocation, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - SQL parser pass done in \t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
s"${TimeMetric.getNewTime()} - SQL parser pass done in \t\t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
)
logger.info("Applying data flow overlay")
val context = new LayerCreatorContext(cpg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ import ai.privado.languageEngine.java.tagger.collection.{CollectionTagger, GrpcC
import ai.privado.languageEngine.java.tagger.sink.{InheritMethodTagger, JavaAPITagger}
import ai.privado.languageEngine.java.tagger.source.{IdentifierTagger, InSensitiveCallTagger}
import ai.privado.tagger.PrivadoBaseTagger
import ai.privado.tagger.collection.WebFormsCollectionTagger
import ai.privado.tagger.config.DBConfigTagger
import ai.privado.tagger.sink.{CustomInheritTagger, RegularSinkTagger}
import ai.privado.tagger.sink.RegularSinkTagger
import ai.privado.tagger.source.{LiteralTagger, SqlQueryTagger}
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Tag
Expand Down Expand Up @@ -72,7 +73,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
println(s"${Calendar.getInstance().getTime} - --InSensitive call tagger invoked...")
new InSensitiveCallTagger(cpg, ruleCache, taggerCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --InSensitive call tagger is done in \t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
s"${TimeMetric.getNewTime()} - --InSensitive call tagger is done in \t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)

println(s"${Calendar.getInstance().getTime} - --DBConfigTagger invoked...")
Expand Down Expand Up @@ -106,13 +107,13 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
println(s"${Calendar.getInstance().getTime} - --Database Query Read Pass invoked...")
new DatabaseQueryReadPass(cpg, ruleCache, taggerCache, EntityMapper.getClassTableMapping(cpg)).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --Database Query Read Pass is done in \t\t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
s"${TimeMetric.getNewTime()} - --Database Query Read Pass is done in \t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)

println(s"${Calendar.getInstance().getTime} - --Database Repository Read Pass invoked...")
new DatabaseRepositoryReadPass(cpg, taggerCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --Database Repository Read Pass is done in \t\t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
s"${TimeMetric.getNewTime()} - --Database Repository Read Pass is done in \t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)

println(s"${Calendar.getInstance().getTime} - --CollectionTagger invoked...")
Expand All @@ -124,15 +125,19 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
println(s"${Calendar.getInstance().getTime} - --SOAPCollectionTagger invoked...")
new SOAPCollectionTagger(cpg, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --SOAPCollectionTagger is done in \t\t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
s"${TimeMetric.getNewTime()} - --SOAPCollectionTagger is done in \t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)

println(s"${Calendar.getInstance().getTime} - --GrpcCollectionTagger invoked...")
new GrpcCollectionTagger(cpg, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --GrpcCollectionTagger is done in \t\t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
s"${TimeMetric.getNewTime()} - --GrpcCollectionTagger is done in \t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)
println(s"${Calendar.getInstance().getTime} - --WebFormsCollectionTagger invoked...")
new WebFormsCollectionTagger(cpg, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --WebFormsCollectionTagger is done in \t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)

logger.info("Done with tagging")

cpg.tag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,43 +25,33 @@ package ai.privado.languageEngine.javascript.processor

import ai.privado.audit.AuditReportEntryPoint
import ai.privado.cache.{AppCache, DataFlowCache, RuleCache}
import ai.privado.entrypoint.{ScanProcessor, TimeMetric}
import ai.privado.entrypoint.ScanProcessor.config
import ai.privado.entrypoint.{ScanProcessor, TimeMetric}
import ai.privado.exporter.{ExcelExporter, JSONExporter}
import ai.privado.languageEngine.java.cache.ModuleCache
import ai.privado.languageEngine.java.passes.config.ModuleFilePass
import ai.privado.languageEngine.java.passes.module.DependenciesNodePass
import ai.privado.languageEngine.javascript.passes.methodfullname.{
MethodFullName,
MethodFullNameForEmptyNodes,
MethodFullNameFromIdentifier
}
import ai.privado.languageEngine.javascript.semantic.Language._
import ai.privado.metric.MetricHandler
import ai.privado.model.{CatLevelOne, ConfigAndRules, Constants}
import ai.privado.model.Constants.{
cpgOutputFileName,
outputAuditFileName,
outputDirectoryName,
outputFileName,
outputIntermediateFileName
}
import ai.privado.model.Constants._
import ai.privado.model.{CatLevelOne, Constants, Language}
import ai.privado.passes.{HTMLParserPass, SQLParser}
import ai.privado.semantic.Language._
import ai.privado.utility.UnresolvedReportUtility
import ai.privado.model.Language
import ai.privado.passes.SQLParser
import ai.privado.utility.Utilities.createCpgFolder
import better.files.File
import io.joern.jssrc2cpg.{Config, JsSrc2Cpg}
import io.shiftleft.codepropertygraph
import org.slf4j.LoggerFactory
import io.shiftleft.semanticcpg.language._
import better.files.File
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.semanticcpg.language._
import org.slf4j.LoggerFactory

import java.util.Calendar
import scala.collection.mutable.ListBuffer
import scala.jdk.CollectionConverters.CollectionHasAsScala
import scala.util.{Failure, Success, Try}
import scala.collection.mutable.ListBuffer

object JavascriptProcessor {

Expand All @@ -74,6 +64,7 @@ object JavascriptProcessor {
): Either[String, Unit] = {
xtocpg match {
case Success(cpg) =>
new HTMLParserPass(cpg, sourceRepoLocation, ruleCache).createAndApply()
logger.info("Applying default overlays")
logger.info("Enhancing Javascript graph")
logger.debug("Running custom passes")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
package ai.privado.languageEngine.javascript.tagger

import ai.privado.cache.RuleCache
import ai.privado.languageEngine.javascript.tagger.collection.CollectionTagger
import ai.privado.entrypoint.TimeMetric
import ai.privado.languageEngine.javascript.tagger.sink.RegularSinkTagger
import ai.privado.languageEngine.javascript.tagger.source.IdentifierTagger
import ai.privado.tagger.PrivadoBaseTagger
import ai.privado.languageEngine.javascript.tagger.sink.JSAPITagger
import ai.privado.tagger.collection.WebFormsCollectionTagger
import ai.privado.tagger.source.{LiteralTagger, SqlQueryTagger}
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Tag
Expand Down Expand Up @@ -61,8 +61,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
new RegularSinkTagger(cpg, ruleCache).createAndApply()
println(s"${Calendar.getInstance().getTime} - APITagger invoked...")
new JSAPITagger(cpg, ruleCache).createAndApply()
println(s"${Calendar.getInstance().getTime} - CollectionTagger invoked...")
new CollectionTagger(cpg, ruleCache).createAndApply()
println(s"${Calendar.getInstance().getTime} - WebFormsCollectionTagger invoked...")
new WebFormsCollectionTagger(cpg, ruleCache).createAndApply()
logger.info("Done with tagging")

cpg.tag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ import io.joern.javasrc2cpg.Config
import java.util.Calendar
import scala.jdk.CollectionConverters.CollectionHasAsScala
import scala.util.{Failure, Success, Try}
import ai.privado.passes.SQLParser
import ai.privado.passes.{HTMLParserPass, SQLParser}
import io.joern.x2cpg.passes.base.AstLinkerPass
import io.shiftleft.codepropertygraph.generated.nodes.AstNode

Expand Down Expand Up @@ -77,7 +77,11 @@ object PythonProcessor {
println(
s"${TimeMetric.getNewTime()} - Run InheritanceFullNamePass done in \t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
)

println(s"${Calendar.getInstance().getTime} - HTML parser pass")
new HTMLParserPass(cpg, sourceRepoLocation, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - HTML parser pass done in \t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
)
new PythonTypeRecoveryPass(cpg).createAndApply()
println(
s"${TimeMetric.getNewTime()} - Run PythonTypeRecovery done in \t\t\t- ${TimeMetric.setNewTimeToLastAndGetTimeDiff()}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import ai.privado.languageEngine.python.tagger.collection.CollectionTagger
import ai.privado.languageEngine.python.tagger.sink.{InheritMethodTagger, PythonAPITagger}
import ai.privado.languageEngine.python.tagger.source.{IdentifierTagger, LiteralTagger}
import ai.privado.tagger.PrivadoBaseTagger
import ai.privado.tagger.collection.WebFormsCollectionTagger
import ai.privado.tagger.config.PythonDBConfigTagger
import ai.privado.tagger.sink.{CustomInheritTagger, LogShareSinkTagger, RegularSinkTagger}
import ai.privado.tagger.source.SqlQueryTagger
Expand Down Expand Up @@ -88,6 +89,11 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger {
println(
s"${TimeMetric.getNewTime()} - --Database Read Pass is done in \t\t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)
println(s"${Calendar.getInstance().getTime} - --WebFormsCollectionTagger invoked...")
new WebFormsCollectionTagger(cpg, ruleCache).createAndApply()
println(
s"${TimeMetric.getNewTime()} - --WebFormsCollectionTagger is done in \t\t- ${TimeMetric.setNewTimeToStageLastAndGetTimeDiff()}"
)
logger.info("Done with tagging")
cpg.tag
}
Expand Down
5 changes: 5 additions & 0 deletions src/main/scala/ai/privado/model/Constants.scala
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ object Constants {
val jsxOpenElement = "JSXOpeningElement"
val jsxElement = "JSXElement"

val HTMLElement = "HTMLElement"
val HTMLOpenElement = "HTMLOpenElement"
val HTMLClosingElement = "HTMLClosingElement"
val HTMLElementAttribute = "HTMLElementAttribute"

val annotations = "annotations"
val default = "default"
}
Loading

0 comments on commit b448690

Please sign in to comment.