Files
agentic-dev/scripts/compliance/emit-sub-processors.mjs
Danijel Martinek 33bac95c41 feat(scripts): add emit-sub-processors compliance script + tests
Adds scripts/compliance/emit-sub-processors.mjs which walks
docs/library-decisions/*.md, filters is-sub-processor: true entries
via frontmatter parsing, merges compliance/sub-processors.manual.yml
(graceful skip if absent), and emits sorted deterministic YAML to
compliance/sub-processors.yml.

- parseFrontmatter: extracts top-level scalars, skips nested blocks
- parseLibraryTraceSubProcessors: discriminated-union filter on
  is-sub-processor flag
- loadManualEntries / parseSimpleYamlList: flat YAML list parser for
  manual entries; injects source: manual
- buildSubProcessors: merge + sort by package name
- renderSubProcessorsYaml: package-first field order, rest alphabetical
- --check and --print modes via shared unifiedDiff from emit-data-map
- 39 unit tests across all exported functions

Wires compliance:sub-processors root package script.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 20:01:06 +00:00

340 lines
9.4 KiB
JavaScript

#!/usr/bin/env node
/**
* emit-sub-processors.mjs — Third-party sub-processor inventory emitter.
*
* Walks docs/library-decisions/*.md, filters entries where is-sub-processor: true,
* merges compliance/sub-processors.manual.yml (if present) with source: manual flag,
* and emits a sorted deterministic YAML inventory at compliance/sub-processors.yml.
*
* Usage:
* node scripts/compliance/emit-sub-processors.mjs # write compliance/sub-processors.yml
* node scripts/compliance/emit-sub-processors.mjs --print # write to stdout
* node scripts/compliance/emit-sub-processors.mjs --check # diff vs committed file; exit 1 on mismatch
*/
import fs from "node:fs";
import path from "node:path";
import { unifiedDiff, REPO_ROOT } from "./emit-data-map.mjs";
export { unifiedDiff, REPO_ROOT };
export const OUTPUT_PATH = "compliance/sub-processors.yml";
export const MANUAL_PATH = "compliance/sub-processors.manual.yml";
// ---- Frontmatter parser ----
/** Parse a YAML scalar value string into its JS equivalent. */
function parseScalarValue(raw) {
if (raw === "true") return true;
if (raw === "false") return false;
if (raw === "null") return null;
if (
(raw.startsWith('"') && raw.endsWith('"')) ||
(raw.startsWith("'") && raw.endsWith("'"))
) {
return raw.slice(1, -1);
}
return raw;
}
/**
* Parse top-level scalar fields from YAML frontmatter in a markdown file.
* Returns an object of key → value pairs, or null if no frontmatter is found.
* Skips comment lines, empty lines, and indented lines (nested block fields).
*/
export function parseFrontmatter(src) {
const match = /^---\r?\n([\s\S]*?)\r?\n---/.exec(src);
if (!match) return null;
const result = {};
for (const line of match[1].split("\n")) {
if (!line.trim() || line.startsWith("#")) continue;
if (line.startsWith(" ") || line.startsWith("\t")) continue;
const colonIdx = line.indexOf(":");
if (colonIdx === -1) continue;
const key = line.slice(0, colonIdx).trim();
const rawValue = line.slice(colonIdx + 1).trim();
if (!rawValue) continue; // block header like "filter-results:"
result[key] = parseScalarValue(rawValue);
}
return result;
}
// ---- Library decision file discovery ----
/**
* Find all library decision markdown files under docs/library-decisions/.
* Excludes _template.md and any file starting with `_`.
*/
export function findLibraryDecisionFiles(repoRoot = REPO_ROOT) {
const dir = path.join(repoRoot, "docs", "library-decisions");
if (!fs.existsSync(dir)) return [];
return fs
.readdirSync(dir)
.sort()
.filter((f) => f.endsWith(".md") && !f.startsWith("_"))
.map((f) => path.join(dir, f));
}
// ---- Sub-processor parsing from library traces ----
const TRACE_FIELDS = [
"package",
"version",
"decision",
"data-sent",
"region",
"dpa-signed",
"sccs-required",
"contact",
];
/**
* Walk library decision files and return sub-processor entries where
* is-sub-processor: true. Each entry gets source: "library-trace" injected.
*/
export function parseLibraryTraceSubProcessors(repoRoot = REPO_ROOT) {
const files = findLibraryDecisionFiles(repoRoot);
const entries = [];
for (const filePath of files) {
let src;
try {
src = fs.readFileSync(filePath, "utf8");
} catch {
continue;
}
const meta = parseFrontmatter(src);
if (!meta || meta["is-sub-processor"] !== true) continue;
const entry = { source: "library-trace" };
for (const field of TRACE_FIELDS) {
if (meta[field] !== undefined) entry[field] = meta[field];
}
entries.push(entry);
}
return entries;
}
// ---- Manual entries ----
/**
* Parse a simple YAML list-of-objects: each item starts with "- key: value"
* and continuation lines are indented with spaces or tabs.
* Returns an array of plain objects.
*/
export function parseSimpleYamlList(src) {
const entries = [];
let current = null;
for (const line of src.split("\n")) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith("#")) continue;
if (line.startsWith("- ")) {
if (current) entries.push(current);
current = {};
const rest = line.slice(2).trim();
const colonIdx = rest.indexOf(":");
if (colonIdx !== -1) {
const key = rest.slice(0, colonIdx).trim();
const rawVal = rest.slice(colonIdx + 1).trim();
if (rawVal) current[key] = parseScalarValue(rawVal);
}
} else if (current && (line.startsWith(" ") || line.startsWith("\t"))) {
const colonIdx = line.indexOf(":");
if (colonIdx !== -1) {
const key = line.slice(0, colonIdx).trim();
const rawVal = line.slice(colonIdx + 1).trim();
if (rawVal) current[key] = parseScalarValue(rawVal);
}
}
}
if (current) entries.push(current);
return entries;
}
/**
* Load manual sub-processor entries from compliance/sub-processors.manual.yml.
* Returns empty array if the file doesn't exist (graceful skip).
* Injects source: "manual" into each entry.
*/
export function loadManualEntries(repoRoot = REPO_ROOT) {
const manualPath = path.resolve(repoRoot, MANUAL_PATH);
if (!fs.existsSync(manualPath)) return [];
let src;
try {
src = fs.readFileSync(manualPath, "utf8");
} catch {
return [];
}
return parseSimpleYamlList(src).map((entry) => ({
...entry,
source: "manual",
}));
}
// ---- Builder ----
/**
* Merge library-trace and manual sub-processor entries.
* Sorts by package name for deterministic output.
*/
export function buildSubProcessors(traced, manual) {
const all = [...traced, ...manual];
all.sort((a, b) =>
String(a.package ?? "").localeCompare(String(b.package ?? "")),
);
return all;
}
// ---- YAML serialization ----
const YAML_HEADER = [
"# compliance/sub-processors.yml — Third-party sub-processor inventory",
"# Generated by scripts/compliance/emit-sub-processors.mjs — do not edit manually.",
"# Run `pnpm compliance:sub-processors` to regenerate.",
].join("\n");
/** Quote a YAML string scalar only when necessary. */
function yamlStr(s) {
if (typeof s !== "string") return String(s);
if (
s === "" ||
["true", "false", "null", "yes", "no", "on", "off"].includes(s) ||
/[[{},:#&*!|>'"%@`\]]/u.test(s) ||
/^\s|\s$/.test(s)
) {
return `"${s.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
}
return s;
}
// package first (natural identifier), then remaining fields alphabetically
const ENTRY_FIELDS = [
"package",
"contact",
"data-sent",
"decision",
"dpa-signed",
"region",
"sccs-required",
"source",
"version",
];
/**
* Render sub-processors as deterministic YAML.
* Entries sorted by package name (done in buildSubProcessors).
* Fields rendered in a fixed order: package first, rest alphabetical.
*/
export function renderSubProcessorsYaml(entries) {
let yaml = YAML_HEADER + "\n";
yaml += "sub-processors:\n";
if (entries.length === 0) {
yaml += " []\n";
return yaml;
}
for (const entry of entries) {
let first = true;
for (const field of ENTRY_FIELDS) {
const value = entry[field];
if (value === undefined) continue;
const rendered =
typeof value === "boolean" ? String(value) : yamlStr(String(value));
if (first) {
yaml += ` - ${field}: ${rendered}\n`;
first = false;
} else {
yaml += ` ${field}: ${rendered}\n`;
}
}
}
return yaml;
}
// ---- CLI ----
function parseArgs(argv) {
const out = { mode: "write" }; // 'write' | 'print' | 'check'
for (let i = 2; i < argv.length; i++) {
if (argv[i] === "--print") out.mode = "print";
else if (argv[i] === "--check") out.mode = "check";
else if (argv[i] === "--help" || argv[i] === "-h") {
console.log(
[
"Usage: node scripts/compliance/emit-sub-processors.mjs [--print | --check]",
" (default): write compliance/sub-processors.yml",
" --print: write YAML to stdout",
" --check: diff vs committed file; exit 1 on mismatch",
].join("\n"),
);
process.exit(0);
}
}
return out;
}
function main() {
const args = parseArgs(process.argv);
const repoRoot = process.cwd();
const traced = parseLibraryTraceSubProcessors(repoRoot);
const manual = loadManualEntries(repoRoot);
const entries = buildSubProcessors(traced, manual);
const yaml = renderSubProcessorsYaml(entries);
const outPath = path.resolve(repoRoot, OUTPUT_PATH);
if (args.mode === "print") {
process.stdout.write(yaml);
return;
}
if (args.mode === "check") {
if (!fs.existsSync(outPath)) {
process.stderr.write(
`[compliance:sub-processors] --check: no committed file at ${OUTPUT_PATH}\n` +
`Run \`pnpm compliance:sub-processors\` to generate it first.\n`,
);
process.exit(1);
}
const committed = fs.readFileSync(outPath, "utf8");
const diff = unifiedDiff(committed, yaml, OUTPUT_PATH);
if (diff === null) {
console.log(`✓ compliance:sub-processors — ${OUTPUT_PATH} is up to date`);
process.exit(0);
}
process.stderr.write(
`✗ compliance:sub-processors — ${OUTPUT_PATH} is out of date:\n${diff}\n`,
);
process.exit(1);
}
// Default: write to file
const dir = path.dirname(outPath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(outPath, yaml, "utf8");
console.log(`✓ compliance:sub-processors — wrote ${OUTPUT_PATH}`);
}
if (import.meta.url === `file://${process.argv[1]}`) {
main();
}