77import json
88sys .path .append (str (Path (__file__ ).parent .parent ))
99
10- from src .config import CODEQL_DB_PATH , PROJECT_SOURCE_CODE_DIR , IRIS_ROOT_DIR , BUILD_INFO , DEP_CONFIGS , DATA_DIR
10+ from src .config import CODEQL_DB_PATH , PROJECT_SOURCE_CODE_DIR , IRIS_ROOT_DIR , BUILD_INFO , DEP_CONFIGS , DATA_DIR , CODEQL_DIR , CVES_MAPPED_W_COMMITS_DIR
11+ from scripts .docker_utils import ensure_image , create_container , exec_in_container , parse_project_image , copy_dir_to_container , copy_from_container
1112ALLVERSIONS = json .load (open (DEP_CONFIGS ))
1213
14+ # Path to custom build commands CSV
15+ BUILD_CMDS_CSV = os .path .join (DATA_DIR , "build_cmds.csv" )
16+
17+ def load_custom_build_commands (csv_path : str = BUILD_CMDS_CSV ) -> dict [str , str ]:
18+ """
19+ Load custom build commands from the build command CSV file
20+ Returns a mapping of project_slug -> build_cmd. Missing file or invalid headers yields an empty mapping.
21+ """
22+ build_cmds : dict [str , str ] = {}
23+ if not os .path .exists (csv_path ):
24+ return build_cmds
25+ with open (csv_path , "r" , newline = "" ) as f :
26+ reader = csv .DictReader (f )
27+ for row in reader :
28+ slug = (row .get ("project_slug" ) or "" ).strip ()
29+ cmd = (row .get ("build_cmd" ) or "" ).strip ()
30+ if slug and cmd :
31+ build_cmds [slug ] = cmd
32+ return build_cmds
33+
34+ # Custom build commands for the CodeQL database creation (loaded from CSV)
35+ CUSTOM_BUILD_COMMANDS : dict [str , str ] = load_custom_build_commands ()
36+
1337def setup_environment (row ):
1438 env = os .environ .copy ()
1539
@@ -48,6 +72,9 @@ def create_codeql_database(project_slug, env, db_base_path, sources_base_path):
4872 print (f"PATH: { env .get ('PATH' , 'Not set' )} " )
4973 print (f"JAVA_HOME: { env .get ('JAVA_HOME' , 'Not set' )} " )
5074
75+ # Prefer custom build command when available
76+ custom_cmd = CUSTOM_BUILD_COMMANDS .get (project_slug )
77+
5178 try :
5279 java_version = subprocess .check_output (['java' , '-version' ],
5380 stderr = subprocess .STDOUT ,
@@ -67,8 +94,11 @@ def create_codeql_database(project_slug, env, db_base_path, sources_base_path):
6794 database_path ,
6895 "--source-root" , source_path ,
6996 "--language" , "java" ,
70- "--overwrite"
97+ "--overwrite" ,
7198 ]
99+ if custom_cmd :
100+ print (f"Using custom build command for { project_slug } : { custom_cmd } " )
101+ command .extend (["--command" , custom_cmd ])
72102
73103 try :
74104 print (f"Creating database at: { database_path } " )
@@ -85,11 +115,121 @@ def create_codeql_database(project_slug, env, db_base_path, sources_base_path):
85115 print (f'Stderr Info:\n { e .stderr .decode ()} ' )
86116 raise
87117
118+
119+ def create_codeql_database_in_container (project_slug : str , row : dict , db_base_path : str , verbose : bool = False ) -> None :
120+ image = parse_project_image (project_slug ) # Parse the project image from the project slug
121+ ensure_image (image )
122+
123+ # Add -docker suffix to database name when using container
124+ db_project_slug = f"{ project_slug } -docker"
125+
126+ # Prepare host and container paths
127+ host_db_dir = os .path .abspath (db_base_path ) # Create the host database directory
128+ os .makedirs (host_db_dir , exist_ok = True )
129+ container_codeql_dir = "/codeql"
130+ container_codeql_bin = f"{ container_codeql_dir } /codeql"
131+ container_out_base = "/out"
132+ container_db_dir = f"{ container_out_base } /{ db_project_slug } "
133+ container_source_root = "/workspace/repo"
134+
135+ # Read repo info from project_info.csv
136+ def get_repo_info_from_project_info (slug : str ) -> tuple [str , str ]:
137+ with open (CVES_MAPPED_W_COMMITS_DIR , 'r' ) as f :
138+ reader = csv .reader (f )
139+ next (reader , None ) # header
140+ for line in reader :
141+ if len (line ) > 10 and line [1 ] == slug :
142+ repo_url = line [8 ]
143+ commit_id = line [10 ]
144+ return repo_url , commit_id
145+ raise RuntimeError (f"Project slug '{ slug } ' not found in project_info.csv" )
146+
147+ container = create_container (image = image , working_dir = container_source_root )
148+ try :
149+ container .start ()
150+
151+ # Copy CodeQL CLI into container and ensure out dir exists
152+ copy_dir_to_container (container , CODEQL_DIR , container_codeql_dir )
153+ exec_in_container (container , ["bash" , "-lc" , f"mkdir -p { container_out_base } " ])
154+
155+ # Fresh fetch like fetch_one.py: reclone at desired commit
156+ repo_url , commit_id = get_repo_info_from_project_info (project_slug )
157+ fetch_cmd = (
158+ f"rm -rf repo && mkdir -p repo && cd repo && "
159+ f"git init && git remote add origin '{ repo_url } ' && "
160+ f"git fetch --no-tags --depth 1 origin { commit_id } && "
161+ f"git reset --hard FETCH_HEAD && "
162+ f"git fetch --no-tags --depth 1 origin '+refs/heads/*:refs/remotes/origin/*'"
163+ )
164+ print (f"Refreshing sources from { repo_url } @ { commit_id } " )
165+ code , output = exec_in_container (
166+ container ,
167+ ["bash" , "-lc" , fetch_cmd ],
168+ workdir = "/workspace" ,
169+ stream = verbose ,
170+ )
171+ if code != 0 :
172+ if output :
173+ print (output )
174+ raise RuntimeError (f"Failed to fetch sources for { project_slug } " )
175+
176+ # Apply project patch if available (mirror fetch_one.py behavior)
177+ patch_dir_host = os .path .join (DATA_DIR , "patches" )
178+ patch_file_host = os .path .join (patch_dir_host , f"{ project_slug } .patch" )
179+ if os .path .exists (patch_file_host ):
180+ print (f"Found patch for { project_slug } , applying..." )
181+ # Copy entire patches dir to container to keep logic simple
182+ copy_dir_to_container (container , patch_dir_host , "/patches" )
183+ code , output = exec_in_container (
184+ container ,
185+ ["bash" , "-lc" , f"git apply /patches/{ project_slug } .patch" ],
186+ workdir = container_source_root ,
187+ stream = verbose ,
188+ )
189+ if code != 0 :
190+ if output :
191+ print (output )
192+ raise RuntimeError (f"Failed to apply patch for { project_slug } " )
193+ else :
194+ print ("No patch found; skipping patching." )
195+
196+ # Prefer custom build command when available
197+ custom_cmd = CUSTOM_BUILD_COMMANDS .get (project_slug )
198+ if custom_cmd :
199+ print (f"Using custom build command for { project_slug } : { custom_cmd } " )
200+ codeql_cmd = (f"{ container_codeql_bin } database create { container_db_dir } "
201+ f"--source-root { container_source_root } --language java --overwrite "
202+ f"--command \" { custom_cmd } \" " )
203+ else :
204+ codeql_cmd = (f"{ container_codeql_bin } database create { container_db_dir } --source-root { container_source_root } --language java --overwrite" )
205+
206+ print (f"Initializing database at { container_db_dir } ." )
207+ code , output = exec_in_container (container , ["bash" , "-lc" , codeql_cmd ], workdir = container_source_root , stream = verbose )
208+
209+ if code != 0 :
210+ print (f"CodeQL database creation failed for { project_slug } " )
211+ if not verbose and output :
212+ print ("Error output:" )
213+ print (output )
214+ raise RuntimeError (f"CodeQL database creation failed in container for { project_slug } " )
215+
216+ print (f"Finalizing database at { container_db_dir } ." )
217+ # Copy database back to host
218+ copy_from_container (container , container_db_dir , host_db_dir )
219+ print (f"Successfully created database at { host_db_dir } /{ db_project_slug } ." )
220+ finally :
221+ try :
222+ container .remove (force = True )
223+ except Exception :
224+ pass
225+
88226def main ():
89227 parser = argparse .ArgumentParser (description = 'Create CodeQL databases for cwe-bench-java projects' )
90228 parser .add_argument ('--project' , help = 'Specific project slug' , default = None )
91229 parser .add_argument ('--db-path' , help = 'Base path for storing CodeQL databases' , default = CODEQL_DB_PATH )
92230 parser .add_argument ('--sources-path' , help = 'Base path for project sources' , default = PROJECT_SOURCE_CODE_DIR )
231+ parser .add_argument ('--use-container' , action = 'store_true' , help = 'Create DB inside the project container using mounted CodeQL' )
232+ parser .add_argument ('--verbose' , action = 'store_true' , help = 'Show verbose output during database creation' )
93233 args = parser .parse_args ()
94234
95235 # Load build information
@@ -98,14 +238,20 @@ def main():
98238 if args .project :
99239 project = next ((p for p in projects if p ['project_slug' ] == args .project ), None )
100240 if project :
101- env = setup_environment (project )
102- create_codeql_database (project ['project_slug' ], env , args .db_path , args .sources_path )
241+ if args .use_container :
242+ create_codeql_database_in_container (project ['project_slug' ], project , args .db_path , args .verbose )
243+ else :
244+ env = setup_environment (project )
245+ create_codeql_database (project ['project_slug' ], env , args .db_path , args .sources_path )
103246 else :
104247 print (f"Project { args .project } not found in CSV file" )
105248 else :
106249 for project in projects :
107- env = setup_environment (project )
108- create_codeql_database (project ['project_slug' ], env , args .db_path , args .sources_path )
250+ if args .use_container :
251+ create_codeql_database_in_container (project ['project_slug' ], project , args .db_path , args .verbose )
252+ else :
253+ env = setup_environment (project )
254+ create_codeql_database (project ['project_slug' ], env , args .db_path , args .sources_path )
109255
110256# Location of build_info_local.csv file
111257LOCAL_BUILD_INFO = os .path .join (DATA_DIR , "build-info" , "build_info_local.csv" )
0 commit comments