Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,17 @@ QLStat provides a comprehensive framework for large-scale empirical analysis of
- Install [CodeQL CLI](https://docs.github.com/en/code-security/how-tos/scan-code-for-vulnerabilities/scan-from-the-command-line/setting-up-the-codeql-cli#1-download-the-codeql-cli-tar-archive) and add codeql to your PATH.
- Install [Go](https://go.dev/dl/).

## Demo

- [`demo.sh`](./demo.sh): A demo script to run QLStat on a sample configuration file.
- Results are in `./codeqlResult/escape_ext/heapvar_should_move`. You will find optimization variables allocated in heap.
- [`demo.yaml`](./demo.yaml): The sample configuration file demonstrating the usage of QLStat.

## Usage

### 1. Configuration

Create your `stat.yaml` config file according to [`example.yaml`](./example.yaml) or YAML files in [`yaml-examples/`](./yaml-examples/). The configuration supports several key sections:
Create your `stat.yaml` config file according to [`example.yaml`](./example.yaml), [`demo.yaml`](./demo.yaml) or YAML files in [`yaml-examples/`](./yaml-examples/). The configuration supports several key sections:

- `sources`: Define repository sources with prefixes and specific repositories
- `language`: Specify the programming language for analysis (e.g., go)
Expand Down
34 changes: 20 additions & 14 deletions check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,40 @@ function splitline {
echo "-----------------"
}

echo "Batch Clone Build Test"
if ! go test -v ./cmd/batch_clone_build; then
echo "Batch Clone Build Test Failed"
exit 1
fi
# echo "Batch Clone Build Test"
# if ! go test -v ./cmd/batch_clone_build; then
# echo "Batch Clone Build Test Failed"
# exit 1
# fi

splitline
echo "Escape Adapter Test"
if ! go test -v ./cmd/escape_adapter; then
echo "Escape Adapter Test Failed"
exit 1
fi
# splitline
# echo "Escape Adapter Test"
# if ! go test -v ./cmd/escape_adapter; then
# echo "Escape Adapter Test Failed"
# exit 1
# fi

splitline
echo "External Verify Test"

go run ./cmd/batch_clone_build -noclone yaml-examples/malloc_test.yaml
go run ./cmd/batch_clone_build yaml-examples/malloc_test.yaml
# check number of lines in pprof ext csv file is equal to count of CodeQL class instances
if ! go run ./cmd/pprof-external-verify codeql-db/test/malloc_test/ instance_count ; then
echo "External Verify Test for malloc_test Failed"
exit 1
fi

go run ./cmd/codeql_qdriver -collect yaml-examples/malloc_test.yaml
# check mallocgc time is mapped to CodeQL class instances
if [ "$(grep -c . codeqlResult/pprof_ext/malloc_time/malloc_test.csv)" -ne 2 ]; then
MALLOC_CSV="codeqlResult/pprof_ext/malloc_time/malloc_test.csv"
if [ ! -f "$MALLOC_CSV" ]; then
echo "External Verify Test for mallocgc time mapping Failed: $MALLOC_CSV does not exist. Check if the query ran correctly."
exit 1
fi
if [ "$(grep -c . "$MALLOC_CSV")" -ne 2 ]; then
echo "External Verify Test for mallocgc time mapping Failed"
echo "The content of mallocgc time csv file is:"
cat codeqlResult/pprof_ext/malloc_time/malloc_test.csv
cat "$MALLOC_CSV"
exit 1
fi

Expand Down
17 changes: 16 additions & 1 deletion cmd/batch_clone_build/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,24 @@ func build(cfg *config.Artifact, repo config.Repo, resChan chan CreateDBResult,
"database", "create", dbPath, "-l=" + cfg.Lang, "--overwrite", "-s=" + repo.DirPath(cfg.RepoRoot),
}
if buildcommand != "default" {
args = append(args, "-c", buildcommand)
if _, err := os.Stat(buildcommand); err != nil {
// buildcommand does not exist
args = append(args, "-c", buildcommand)
} else {
// buildcommand script exists. Change to absolute path
buildcommand, err = filepath.Abs(buildcommand)
if err != nil {
log.Fatalf("Failed to get absolute path for buildcommand: %v", err)
}
args = append(args, "-c", buildcommand)
}
}
cmd := exec.CommandContext(ctx, "codeql", args...)
cmd.Env = append(os.Environ(),
genEnv([]envpair{
{REPO_DIR, abspath(repo.DirPath(cfg.RepoRoot))},
{PROJROOT, abspath(utils.ProjectRoot())},
})...)
cmd.Stdout = outFile
cmd.Stderr = errFile

Expand Down
20 changes: 20 additions & 0 deletions cmd/batch_clone_build/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package main

const (
REPO_DIR = "REPO_DIR" // the root directory of the repository
OUTPUT_DIR = "OUTPUT_DIR" // the directory to store intermediate results for generating external predicate
PROJROOT = "PROJROOT" // the root directory of the project
DB_EXT_DIR = "DB_EXT_DIR" // the directory to store external predicate database
)

type envpair struct {
name, value string
}

// genEnv converts envpairs to strings in the format of "name=value"
func genEnv(pairs []envpair) (res []string) {
for _, pair := range pairs {
res = append(res, pair.name+"="+pair.value)
}
return res
}
28 changes: 7 additions & 21 deletions cmd/batch_clone_build/extgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,26 +97,6 @@ func abspath(path string) string {
return p
}

/*
genScriptEnv generate environment variables for the script

REPO_DIR is the root directory of the repository

OUTPUT_DIR is the directory to store intermediate results for generating external predicate

PROJROOT is the root directory of the project

DB_EXT_DIR is the directory to store external predicate database
*/
func genScriptEnv(cfg *config.Artifact, repo config.Repo) []string {
return []string{
"REPO_DIR=" + abspath(repo.DirPath(cfg.RepoRoot)),
"OUTPUT_DIR=" + abspath(repo.DirPath(extgenLogDir(cfg))),
"PROJROOT=" + abspath(utils.ProjectRoot()),
"DB_EXT_DIR=" + abspath(repo.DBExtDir(cfg.DBRoot)),
}
}

func genscript(cfg *config.Artifact, repo config.Repo, script string) {
outFile, errFile := utils.CreateOutAndErr(filepath.Join(repo.DirPath(extgenLogDir(cfg)), "runscript"))
defer outFile.Close()
Expand All @@ -128,7 +108,13 @@ func genscript(cfg *config.Artifact, repo config.Repo, script string) {
} else {
cmd = exec.Command(elems[0], elems[1:]...)
}
cmd.Env = append(os.Environ(), genScriptEnv(cfg, repo)...)

cmd.Env = append(os.Environ(), genEnv([]envpair{
{REPO_DIR, abspath(repo.DirPath(cfg.RepoRoot))},
{OUTPUT_DIR, abspath(repo.DirPath(extgenLogDir(cfg)))},
{PROJROOT, abspath(utils.ProjectRoot())},
{DB_EXT_DIR, abspath(repo.DBExtDir(cfg.DBRoot))},
})...)
cmd.Stdout, cmd.Stderr = outFile, errFile
fmt.Printf("cwd: %s, out: %s, err: %s, cmd: %s\n", cmd.Dir, outFile.Name(), errFile.Name(), cmd.String())
_ = cmd.Run()
Expand Down
20 changes: 16 additions & 4 deletions demo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,38 @@ sources:
- Lslightly/kitex-examples heapvar_should_move

# build database
# buildCmd will be executed in the root directory of repositories.
# "default" buildCmd lets codeql figure out the build command.
language: go # language to analyze
buildTimeout: 3600 # timeout for building repository
buildGrps:
- buildRepos: # "*" means all repositories. "-" means repositories defined in "sources" attribute. You can also specify fullname of repositories to force re-build. Note that repositories with same fullname in different source will be re-built also.
- rclone/rclone
- Lslightly/dolt
# buildCmd will be executed in the root directory of buildRepos. The behavior is decided by codeql. If a custom build script is specified, then the absolute path will be used.
#
# There are 3 types of buildCmd:
# 1. default: buildCmd lets codeql figure out the build command.
# 2. custom script path: relative path from the project root to the build script.
# 3. build command: the build command to execute in the root directory of buildRepos.
#
# If you use custom script, then 4 environment variables will be set:
# - REPO_DIR: the root directory of repository(This is used often in build phase)
# - PROJROOT: the root directory of the project
buildCmd: default
- buildRepos:
- Lslightly/kitex-examples
buildCmd: ./build.sh
buildCmd: yaml-examples/build/kitex-examples.sh
- buildRepos:
- cloudwego/kitex
buildCmd: go build -a ./...


# generate external predicates predicate
# For repositories in each group, same genScript will be applied in the root directory of repositories
# "goescape" means `go build -a -gcflags=-m=2 ./...`. The stderr will be redirected to $logRoot/path/to/repo/m2.log. Then escape_adapter is used to generate databases. The external predicate database is generated in $dbRoot/path/to/repo/ext/$external.csv.
# There are 2 types of genScript:
#
# 1. goescape: it means `go build -a -gcflags=-m=2 ./...`. The stderr will be redirected to $logRoot/path/to/repo/m2.log. Then escape_adapter is used to generate databases. The external predicate database is generated in $dbRoot/path/to/repo/ext/$external.csv.
# 2. custom script path: relative path from the project root to the genScript script.
#
# If you use custom genScript, then 4 environment variables will be set:
# - PROJROOT: the root directory of the project
# - REPO_DIR: the root directory of repository
Expand Down
23 changes: 20 additions & 3 deletions example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,35 @@ sources:
- false-sharing

# build database
# buildCmd will be executed in the root directory of repositories.
# "default" buildCmd lets codeql figure out the build command.
language: go # language to analyze
buildTimeout: 3600 # timeout for building repository
buildGrps:
- buildRepos: # "*" means all repositories. "-" means repositories defined in "sources" attribute. You can also specify fullname of repositories to force re-build. Note that repositories with same fullname in different source will be re-built also.
- "-"
# buildCmd will be executed in the root directory of buildRepos. The behavior is decided by codeql. If a custom build script is specified, then the absolute path will be used.
#
# There are 3 types of buildCmd:
# 1. default: buildCmd lets codeql figure out the build command.
# 2. custom script path: relative path from the project root to the build script.
# 3. build command: the build command to execute in the root directory of buildRepos.
#
# If you use custom script, then 4 environment variables will be set:
# - REPO_DIR: the root directory of repository(This is used often in build phase)
# - PROJROOT: the root directory of the project
buildCmd: default

# generate external predicates predicate
# For repositories in each group, same genScript will be applied in the root directory of repositories
# "goescape" means `go build -a -gcflags=-m=2 ./...`. The stderr will be redirected to $logRoot/path/to/repo/m2.log. Then escape_adapter is used to generate databases. The external predicate database is generated in $dbRoot/path/to/repo/ext/$external.csv.
# There are 2 types of genScript:
#
# 1. goescape: it means `go build -a -gcflags=-m=2 ./...`. The stderr will be redirected to $logRoot/path/to/repo/m2.log. Then escape_adapter is used to generate databases. The external predicate database is generated in $dbRoot/path/to/repo/ext/$external.csv.
# 2. custom script path: relative path from the project root to the genScript script.
#
# If you use custom genScript, then 4 environment variables will be set:
# - PROJROOT: the root directory of the project
# - REPO_DIR: the root directory of repository
# - OUTPUT_DIR: the directory to store intermediate results/log to generate external predicate database
# - DB_EXT_DIR: the directory to store external predicate database
externalGenGrps:
- genRepos:
- false-sharing
Expand Down
6 changes: 6 additions & 0 deletions yaml-examples/build/kitex-examples.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
cd $REPO_DIR/hello
go build -a .
go build -a -o ./client-bin ./client


3 changes: 3 additions & 0 deletions yaml-examples/build/malloc_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
cd $REPO_DIR
go test -c -a .
2 changes: 1 addition & 1 deletion yaml-examples/kitex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ buildTimeout: 3600 # timeout for building repository
buildGrps:
- buildRepos: # "*" means all repositories. "-" means repositories defined in "sources" attribute. You can also specify fullname of repositories to force re-build. Note that repositories with same fullname in different source will be re-built also.
- "-"
buildCmd: ./build.sh
buildCmd: yaml-examples/build/kitex-examples.sh

# generate external predicates predicate
# For repositories in each group, same genScript will be applied in the root directory of repositories
Expand Down
2 changes: 1 addition & 1 deletion yaml-examples/malloc_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ buildTimeout: 3600 # timeout for building repository
buildGrps:
- buildRepos: # "*" means all repositories. "-" means repositories defined in "sources" attribute. You can also specify fullname of repositories to force re-build. Note that repositories with same fullname in different source will be re-built also.
- malloc_test
buildCmd: go test -c -a .
buildCmd: yaml-examples/build/malloc_test.sh
- buildRepos:
- batchmalloc
buildCmd: default
Expand Down
Loading