blob: b7e9ecf16e5699cd158f91b7b0fd898d0d5901b2 [file] [log] [blame]
Masahiro Yamada074075a2021-02-02 15:06:04 +09001#!/usr/bin/env python3
Tom Roederb3020462018-12-18 14:49:07 -08002# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (C) Google LLC, 2018
5#
6# Author: Tom Roeder <tmroeder@google.com>
7#
8"""A tool for generating compile_commands.json in the Linux kernel."""
9
10import argparse
11import json
12import logging
13import os
14import re
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +090015import subprocess
Tom Roederb3020462018-12-18 14:49:07 -080016
17_DEFAULT_OUTPUT = 'compile_commands.json'
18_DEFAULT_LOG_LEVEL = 'WARNING'
19
20_FILENAME_PATTERN = r'^\..*\.cmd$'
21_LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$'
22_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
Masahiro Yamada585d32f2021-02-12 01:11:54 +090023# The tools/ directory adopts a different build system, and produces .cmd
24# files in a different format. Do not support it.
25_EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
Tom Roederb3020462018-12-18 14:49:07 -080026
27def parse_arguments():
28 """Sets up and parses command-line arguments.
29
30 Returns:
31 log_level: A logging level to filter log output.
Masahiro Yamada0a7d3762020-08-22 23:56:12 +090032 directory: The work directory where the objects were built.
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +090033 ar: Command used for parsing .a archives.
Tom Roederb3020462018-12-18 14:49:07 -080034 output: Where to write the compile-commands JSON file.
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +090035 paths: The list of files/directories to handle to find .cmd files.
Tom Roederb3020462018-12-18 14:49:07 -080036 """
37 usage = 'Creates a compile_commands.json database from kernel .cmd files'
38 parser = argparse.ArgumentParser(description=usage)
39
Masahiro Yamada0a7d3762020-08-22 23:56:12 +090040 directory_help = ('specify the output directory used for the kernel build '
Tom Roederb3020462018-12-18 14:49:07 -080041 '(defaults to the working directory)')
Masahiro Yamada6fca36f2020-08-22 23:56:13 +090042 parser.add_argument('-d', '--directory', type=str, default='.',
43 help=directory_help)
Tom Roederb3020462018-12-18 14:49:07 -080044
Masahiro Yamada6fca36f2020-08-22 23:56:13 +090045 output_help = ('path to the output command database (defaults to ' +
46 _DEFAULT_OUTPUT + ')')
47 parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
48 help=output_help)
Tom Roederb3020462018-12-18 14:49:07 -080049
Masahiro Yamadaea6cedc2020-08-22 23:56:10 +090050 log_level_help = ('the level of log messages to produce (defaults to ' +
Tom Roederb3020462018-12-18 14:49:07 -080051 _DEFAULT_LOG_LEVEL + ')')
Masahiro Yamadaea6cedc2020-08-22 23:56:10 +090052 parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
53 default=_DEFAULT_LOG_LEVEL, help=log_level_help)
Tom Roederb3020462018-12-18 14:49:07 -080054
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +090055 ar_help = 'command used for parsing .a archives'
56 parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
57
58 paths_help = ('directories to search or files to parse '
59 '(files should be *.o, *.a, or modules.order). '
60 'If nothing is specified, the current directory is searched')
61 parser.add_argument('paths', type=str, nargs='*', help=paths_help)
62
Tom Roederb3020462018-12-18 14:49:07 -080063 args = parser.parse_args()
64
Masahiro Yamada6fca36f2020-08-22 23:56:13 +090065 return (args.log_level,
66 os.path.abspath(args.directory),
Masahiro Yamadafc2cb222020-08-22 23:56:14 +090067 args.output,
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +090068 args.ar,
69 args.paths if len(args.paths) > 0 else [args.directory])
Masahiro Yamadafc2cb222020-08-22 23:56:14 +090070
71
72def cmdfiles_in_dir(directory):
73 """Generate the iterator of .cmd files found under the directory.
74
75 Walk under the given directory, and yield every .cmd file found.
76
77 Args:
78 directory: The directory to search for .cmd files.
79
80 Yields:
81 The path to a .cmd file.
82 """
83
84 filename_matcher = re.compile(_FILENAME_PATTERN)
Masahiro Yamada585d32f2021-02-12 01:11:54 +090085 exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
Masahiro Yamadafc2cb222020-08-22 23:56:14 +090086
Masahiro Yamada585d32f2021-02-12 01:11:54 +090087 for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
88 # Prune unwanted directories.
89 if dirpath in exclude_dirs:
90 dirnames[:] = []
91 continue
92
Masahiro Yamadafc2cb222020-08-22 23:56:14 +090093 for filename in filenames:
94 if filename_matcher.match(filename):
95 yield os.path.join(dirpath, filename)
Tom Roederb3020462018-12-18 14:49:07 -080096
97
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +090098def to_cmdfile(path):
99 """Return the path of .cmd file used for the given build artifact
100
101 Args:
102 Path: file path
103
104 Returns:
105 The path to .cmd file
106 """
107 dir, base = os.path.split(path)
108 return os.path.join(dir, '.' + base + '.cmd')
109
110
111def cmdfiles_for_o(obj):
112 """Generate the iterator of .cmd files associated with the object
113
114 Yield the .cmd file used to build the given object
115
116 Args:
117 obj: The object path
118
119 Yields:
120 The path to .cmd file
121 """
122 yield to_cmdfile(obj)
123
124
125def cmdfiles_for_a(archive, ar):
126 """Generate the iterator of .cmd files associated with the archive.
127
128 Parse the given archive, and yield every .cmd file used to build it.
129
130 Args:
131 archive: The archive to parse
132
133 Yields:
134 The path to every .cmd file found
135 """
136 for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
137 yield to_cmdfile(obj)
138
139
140def cmdfiles_for_modorder(modorder):
141 """Generate the iterator of .cmd files associated with the modules.order.
142
143 Parse the given modules.order, and yield every .cmd file used to build the
144 contained modules.
145
146 Args:
147 modorder: The modules.order file to parse
148
149 Yields:
150 The path to every .cmd file found
151 """
152 with open(modorder) as f:
153 for line in f:
154 ko = line.rstrip()
155 base, ext = os.path.splitext(ko)
156 if ext != '.ko':
157 sys.exit('{}: module path must end with .ko'.format(ko))
158 mod = base + '.mod'
159 # The first line of *.mod lists the objects that compose the module.
160 with open(mod) as m:
161 for obj in m.readline().split():
162 yield to_cmdfile(obj)
163
164
Masahiro Yamada6ca4c6d2020-08-22 23:56:11 +0900165def process_line(root_directory, command_prefix, file_path):
Tom Roederb3020462018-12-18 14:49:07 -0800166 """Extracts information from a .cmd line and creates an entry from it.
167
168 Args:
169 root_directory: The directory that was searched for .cmd files. Usually
170 used directly in the "directory" entry in compile_commands.json.
Tom Roederb3020462018-12-18 14:49:07 -0800171 command_prefix: The extracted command line, up to the last element.
Masahiro Yamada6ca4c6d2020-08-22 23:56:11 +0900172 file_path: The .c file from the end of the extracted command.
173 Usually relative to root_directory, but sometimes absolute.
Tom Roederb3020462018-12-18 14:49:07 -0800174
175 Returns:
176 An entry to append to compile_commands.
177
178 Raises:
Masahiro Yamada6ca4c6d2020-08-22 23:56:11 +0900179 ValueError: Could not find the extracted file based on file_path and
Tom Roederb3020462018-12-18 14:49:07 -0800180 root_directory or file_directory.
181 """
182 # The .cmd files are intended to be included directly by Make, so they
183 # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
184 # kernel version). The compile_commands.json file is not interepreted
185 # by Make, so this code replaces the escaped version with '#'.
186 prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
187
Masahiro Yamada6ca4c6d2020-08-22 23:56:11 +0900188 # Use os.path.abspath() to normalize the path resolving '.' and '..' .
189 abs_path = os.path.abspath(os.path.join(root_directory, file_path))
190 if not os.path.exists(abs_path):
191 raise ValueError('File %s not found' % abs_path)
Tom Roederb3020462018-12-18 14:49:07 -0800192 return {
Masahiro Yamada6ca4c6d2020-08-22 23:56:11 +0900193 'directory': root_directory,
194 'file': abs_path,
195 'command': prefix + file_path,
Tom Roederb3020462018-12-18 14:49:07 -0800196 }
197
198
199def main():
200 """Walks through the directory and finds and parses .cmd files."""
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +0900201 log_level, directory, output, ar, paths = parse_arguments()
Tom Roederb3020462018-12-18 14:49:07 -0800202
203 level = getattr(logging, log_level)
204 logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
205
Tom Roederb3020462018-12-18 14:49:07 -0800206 line_matcher = re.compile(_LINE_PATTERN)
207
208 compile_commands = []
Tom Roederb3020462018-12-18 14:49:07 -0800209
Masahiro Yamadafc2cb222020-08-22 23:56:14 +0900210 for path in paths:
Masahiro Yamadaecca4fe2020-08-22 23:56:15 +0900211 # If 'path' is a directory, handle all .cmd files under it.
212 # Otherwise, handle .cmd files associated with the file.
213 # Most of built-in objects are linked via archives (built-in.a or lib.a)
214 # but some objects are linked to vmlinux directly.
215 # Modules are listed in modules.order.
216 if os.path.isdir(path):
217 cmdfiles = cmdfiles_in_dir(path)
218 elif path.endswith('.o'):
219 cmdfiles = cmdfiles_for_o(path)
220 elif path.endswith('.a'):
221 cmdfiles = cmdfiles_for_a(path, ar)
222 elif path.endswith('modules.order'):
223 cmdfiles = cmdfiles_for_modorder(path)
224 else:
225 sys.exit('{}: unknown file type'.format(path))
Masahiro Yamadafc2cb222020-08-22 23:56:14 +0900226
227 for cmdfile in cmdfiles:
228 with open(cmdfile, 'rt') as f:
Masahiro Yamada8a685db2020-08-22 23:56:09 +0900229 result = line_matcher.match(f.readline())
230 if result:
Tom Roederb3020462018-12-18 14:49:07 -0800231 try:
Masahiro Yamadafc2cb222020-08-22 23:56:14 +0900232 entry = process_line(directory, result.group(1),
233 result.group(2))
Tom Roederb3020462018-12-18 14:49:07 -0800234 compile_commands.append(entry)
235 except ValueError as err:
236 logging.info('Could not add line from %s: %s',
Masahiro Yamadafc2cb222020-08-22 23:56:14 +0900237 cmdfile, err)
Tom Roederb3020462018-12-18 14:49:07 -0800238
239 with open(output, 'wt') as f:
240 json.dump(compile_commands, f, indent=2, sort_keys=True)
241
Tom Roederb3020462018-12-18 14:49:07 -0800242
243if __name__ == '__main__':
244 main()