dist: add support for configurable file permissions

Current code will copy files from Bazel's output
directory to the dist_dir and simultaneously modify
the file permissions to the default umask (644).

If we simply preserve Bazel's output file permissions,
all files have umask 555 due to an upstream Bazel bug:
https://github.com/bazelbuild/bazel/issues/5588.

Some files in the distribution directory should be
executable and others should be readable/writeable.
This adds the ability for the user to choose either
Bazel's default permissions, or the default umask,
as well as the option to apply permissions based on
filename glob-matching.

Bug: 244215515
Change-Id: Ic04cb38a0723221b25c5872cc213818bf08c8941
Signed-off-by: John Moon <quic_johmoo@quicinc.com>
diff --git a/dist/dist.bzl b/dist/dist.bzl
index bab387e..cb60d99 100644
--- a/dist/dist.bzl
+++ b/dist/dist.bzl
@@ -67,6 +67,7 @@
         dist_dir = None,
         wipe_dist_dir = None,
         allow_duplicate_filenames = None,
+        mode_overrides = None,
         log = None,
         **kwargs):
     """A dist rule to copy files out of Bazel's output directory into a custom location.
@@ -99,7 +100,7 @@
           under workspace root when the target is executed with `bazel run`.
 
           By default, the script will overwrite any files of the same name in `dist_dir`, but preserve
-          any other contents there. This can be overriden with `wipe_dist_dir`.
+          any other contents there. This can be overridden with `wipe_dist_dir`.
 
           See details by running the target with `--help`.
         wipe_dist_dir: If true, and `dist_dir` already exists, `dist_dir` will be removed prior to
@@ -112,6 +113,24 @@
 
           Use of this option is discouraged. Preferably, the input `data` targets would not include labels
           which produce a duplicate filename. This option is available as a last resort.
+        mode_overrides: Map of glob patterns to octal permissions. If the file path being copied matches the
+          glob pattern, the corresponding permissions will be set in `dist_dir`. Full file paths are used for
+          matching even if `flat = True`. Paths are relative to the workspace root.
+
+          Order matters; the overrides will be stepped through in the order given for each file. To prevent
+          buildifier from sorting the list, use the `# do not sort` magic line. For example:
+          ```
+          mode_overrides = {
+              # do not sort
+              "**/*.sh": 755,
+              "**/hello_world": 755,
+              "restricted_dir/**": 600,
+              "common/kernel_aarch64/vmlinux": 755,
+              "**/*": 644,
+          },
+          ```
+
+          If no `mode_overrides` are provided, the default Bazel output permissions are preserved.
         log: If specified, `--log <log>` is provided to the script by default. This sets the
           default log level of the script.
 
@@ -139,6 +158,9 @@
         default_args.append("--wipe_dist_dir")
     if allow_duplicate_filenames:
         default_args.append("--allow_duplicate_filenames")
+    if mode_overrides != None:
+        for (pattern, mode) in mode_overrides.items():
+            default_args += ["--mode_override", pattern, str(mode)]
     if log != None:
         default_args += ["--log", log]
 
diff --git a/dist/dist.py b/dist/dist.py
index c384664..5f96f07 100644
--- a/dist/dist.py
+++ b/dist/dist.py
@@ -33,6 +33,7 @@
 
 import argparse
 import collections
+import fnmatch
 import glob
 import logging
 import os
@@ -40,6 +41,21 @@
 import sys
 import tarfile
 
+
+def copy_with_modes(src, dst, mode_overrides):
+    mode_override = None
+    for (pattern, mode) in mode_overrides:
+        if fnmatch.fnmatch(src, pattern):
+            mode_override = mode
+            break
+
+    # Copy the file with copy2 to preserve whatever permissions are set on src
+    shutil.copy2(os.path.abspath(src), dst, follow_symlinks=True)
+
+    if mode_override:
+        os.chmod(dst, mode_override)
+
+
 def ensure_unique_filenames(files):
     basename_to_srcs_map = collections.defaultdict(list)
     for f in files:
@@ -71,7 +87,7 @@
     return files_to_dist
 
 
-def copy_files_to_dist_dir(files, archives, dist_dir, flat, prefix,
+def copy_files_to_dist_dir(files, archives, mode_overrides, dist_dir, flat, prefix,
     strip_components, archive_prefix, wipe_dist_dir, allow_duplicate_filenames, **ignored):
 
     if flat and not allow_duplicate_filenames:
@@ -91,7 +107,6 @@
             src_relpath = src
 
         src_relpath = os.path.join(prefix, src_relpath)
-        src_abspath = os.path.abspath(src)
 
         dst = os.path.join(dist_dir, src_relpath)
         if os.path.isfile(src):
@@ -100,10 +115,15 @@
             if not os.path.exists(dst_dirname):
                 os.makedirs(dst_dirname)
 
-            shutil.copyfile(src_abspath, dst, follow_symlinks=True)
+            copy_with_modes(src, dst, mode_overrides)
         elif os.path.isdir(src):
             logging.debug("Copying dir: %s" % dst)
-            shutil.copytree(src_abspath, dst, copy_function=shutil.copyfile, dirs_exist_ok=True)
+            shutil.copytree(
+                os.path.abspath(src),
+                dst,
+                copy_function=lambda s, d: copy_with_modes(s, d, mode_overrides),
+                dirs_exist_ok=True,
+            )
 
     for archive in archives:
         try:
@@ -163,9 +183,25 @@
         action="store_true",
         help="allow multiple files with the same name to be copied to dist_dir (overwriting)"
     )
+    parser.add_argument(
+        "--mode_override",
+        metavar=("PATTERN", "MODE"),
+        action="append",
+        nargs=2,
+        default=[],
+        help='glob pattern and mode to set on files matching pattern (e.g. --mode_override "*.sh" "755")'
+    )
 
     args = parser.parse_args(sys.argv[1:])
 
+    mode_overrides = []
+    for (pattern, mode) in args.mode_override:
+        try:
+            mode_overrides.append((pattern, int(mode, 8)))
+        except ValueError:
+            logging.error("invalid octal permissions: %s", mode)
+            sys.exit(1)
+
     config_logging(args.log)
 
     if not os.path.isabs(args.dist_dir):
@@ -177,7 +213,7 @@
 
     files = files_to_dist("*_dist_manifest.txt")
     archives = files_to_dist("*_dist_archives_manifest.txt")
-    copy_files_to_dist_dir(files, archives, **vars(args))
+    copy_files_to_dist_dir(files, archives, mode_overrides, **vars(args))
 
 
 if __name__ == "__main__":