Adds scripts/compliance/emit-sub-processors.mjs which walks docs/library-decisions/*.md, filters is-sub-processor: true entries via frontmatter parsing, merges compliance/sub-processors.manual.yml (graceful skip if absent), and emits sorted deterministic YAML to compliance/sub-processors.yml. - parseFrontmatter: extracts top-level scalars, skips nested blocks - parseLibraryTraceSubProcessors: discriminated-union filter on is-sub-processor flag - loadManualEntries / parseSimpleYamlList: flat YAML list parser for manual entries; injects source: manual - buildSubProcessors: merge + sort by package name - renderSubProcessorsYaml: package-first field order, rest alphabetical - --check and --print modes via shared unifiedDiff from emit-data-map - 39 unit tests across all exported functions Wires compliance:sub-processors root package script. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
340 lines
9.4 KiB
JavaScript
340 lines
9.4 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* emit-sub-processors.mjs — Third-party sub-processor inventory emitter.
|
|
*
|
|
* Walks docs/library-decisions/*.md, filters entries where is-sub-processor: true,
|
|
* merges compliance/sub-processors.manual.yml (if present) with source: manual flag,
|
|
* and emits a sorted deterministic YAML inventory at compliance/sub-processors.yml.
|
|
*
|
|
* Usage:
|
|
* node scripts/compliance/emit-sub-processors.mjs # write compliance/sub-processors.yml
|
|
* node scripts/compliance/emit-sub-processors.mjs --print # write to stdout
|
|
* node scripts/compliance/emit-sub-processors.mjs --check # diff vs committed file; exit 1 on mismatch
|
|
*/
|
|
|
|
import fs from "node:fs";
|
|
import path from "node:path";
|
|
|
|
import { unifiedDiff, REPO_ROOT } from "./emit-data-map.mjs";
|
|
|
|
export { unifiedDiff, REPO_ROOT };
|
|
|
|
export const OUTPUT_PATH = "compliance/sub-processors.yml";
|
|
export const MANUAL_PATH = "compliance/sub-processors.manual.yml";
|
|
|
|
// ---- Frontmatter parser ----
|
|
|
|
/** Parse a YAML scalar value string into its JS equivalent. */
|
|
function parseScalarValue(raw) {
|
|
if (raw === "true") return true;
|
|
if (raw === "false") return false;
|
|
if (raw === "null") return null;
|
|
if (
|
|
(raw.startsWith('"') && raw.endsWith('"')) ||
|
|
(raw.startsWith("'") && raw.endsWith("'"))
|
|
) {
|
|
return raw.slice(1, -1);
|
|
}
|
|
return raw;
|
|
}
|
|
|
|
/**
|
|
* Parse top-level scalar fields from YAML frontmatter in a markdown file.
|
|
* Returns an object of key → value pairs, or null if no frontmatter is found.
|
|
* Skips comment lines, empty lines, and indented lines (nested block fields).
|
|
*/
|
|
export function parseFrontmatter(src) {
|
|
const match = /^---\r?\n([\s\S]*?)\r?\n---/.exec(src);
|
|
if (!match) return null;
|
|
|
|
const result = {};
|
|
for (const line of match[1].split("\n")) {
|
|
if (!line.trim() || line.startsWith("#")) continue;
|
|
if (line.startsWith(" ") || line.startsWith("\t")) continue;
|
|
|
|
const colonIdx = line.indexOf(":");
|
|
if (colonIdx === -1) continue;
|
|
|
|
const key = line.slice(0, colonIdx).trim();
|
|
const rawValue = line.slice(colonIdx + 1).trim();
|
|
if (!rawValue) continue; // block header like "filter-results:"
|
|
|
|
result[key] = parseScalarValue(rawValue);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// ---- Library decision file discovery ----
|
|
|
|
/**
|
|
* Find all library decision markdown files under docs/library-decisions/.
|
|
* Excludes _template.md and any file starting with `_`.
|
|
*/
|
|
export function findLibraryDecisionFiles(repoRoot = REPO_ROOT) {
|
|
const dir = path.join(repoRoot, "docs", "library-decisions");
|
|
if (!fs.existsSync(dir)) return [];
|
|
|
|
return fs
|
|
.readdirSync(dir)
|
|
.sort()
|
|
.filter((f) => f.endsWith(".md") && !f.startsWith("_"))
|
|
.map((f) => path.join(dir, f));
|
|
}
|
|
|
|
// ---- Sub-processor parsing from library traces ----
|
|
|
|
const TRACE_FIELDS = [
|
|
"package",
|
|
"version",
|
|
"decision",
|
|
"data-sent",
|
|
"region",
|
|
"dpa-signed",
|
|
"sccs-required",
|
|
"contact",
|
|
];
|
|
|
|
/**
|
|
* Walk library decision files and return sub-processor entries where
|
|
* is-sub-processor: true. Each entry gets source: "library-trace" injected.
|
|
*/
|
|
export function parseLibraryTraceSubProcessors(repoRoot = REPO_ROOT) {
|
|
const files = findLibraryDecisionFiles(repoRoot);
|
|
const entries = [];
|
|
|
|
for (const filePath of files) {
|
|
let src;
|
|
try {
|
|
src = fs.readFileSync(filePath, "utf8");
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
const meta = parseFrontmatter(src);
|
|
if (!meta || meta["is-sub-processor"] !== true) continue;
|
|
|
|
const entry = { source: "library-trace" };
|
|
for (const field of TRACE_FIELDS) {
|
|
if (meta[field] !== undefined) entry[field] = meta[field];
|
|
}
|
|
entries.push(entry);
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
// ---- Manual entries ----
|
|
|
|
/**
|
|
* Parse a simple YAML list-of-objects: each item starts with "- key: value"
|
|
* and continuation lines are indented with spaces or tabs.
|
|
* Returns an array of plain objects.
|
|
*/
|
|
export function parseSimpleYamlList(src) {
|
|
const entries = [];
|
|
let current = null;
|
|
|
|
for (const line of src.split("\n")) {
|
|
const trimmed = line.trim();
|
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
|
|
if (line.startsWith("- ")) {
|
|
if (current) entries.push(current);
|
|
current = {};
|
|
const rest = line.slice(2).trim();
|
|
const colonIdx = rest.indexOf(":");
|
|
if (colonIdx !== -1) {
|
|
const key = rest.slice(0, colonIdx).trim();
|
|
const rawVal = rest.slice(colonIdx + 1).trim();
|
|
if (rawVal) current[key] = parseScalarValue(rawVal);
|
|
}
|
|
} else if (current && (line.startsWith(" ") || line.startsWith("\t"))) {
|
|
const colonIdx = line.indexOf(":");
|
|
if (colonIdx !== -1) {
|
|
const key = line.slice(0, colonIdx).trim();
|
|
const rawVal = line.slice(colonIdx + 1).trim();
|
|
if (rawVal) current[key] = parseScalarValue(rawVal);
|
|
}
|
|
}
|
|
}
|
|
if (current) entries.push(current);
|
|
return entries;
|
|
}
|
|
|
|
/**
|
|
* Load manual sub-processor entries from compliance/sub-processors.manual.yml.
|
|
* Returns empty array if the file doesn't exist (graceful skip).
|
|
* Injects source: "manual" into each entry.
|
|
*/
|
|
export function loadManualEntries(repoRoot = REPO_ROOT) {
|
|
const manualPath = path.resolve(repoRoot, MANUAL_PATH);
|
|
if (!fs.existsSync(manualPath)) return [];
|
|
|
|
let src;
|
|
try {
|
|
src = fs.readFileSync(manualPath, "utf8");
|
|
} catch {
|
|
return [];
|
|
}
|
|
|
|
return parseSimpleYamlList(src).map((entry) => ({
|
|
...entry,
|
|
source: "manual",
|
|
}));
|
|
}
|
|
|
|
// ---- Builder ----
|
|
|
|
/**
|
|
* Merge library-trace and manual sub-processor entries.
|
|
* Sorts by package name for deterministic output.
|
|
*/
|
|
export function buildSubProcessors(traced, manual) {
|
|
const all = [...traced, ...manual];
|
|
all.sort((a, b) =>
|
|
String(a.package ?? "").localeCompare(String(b.package ?? "")),
|
|
);
|
|
return all;
|
|
}
|
|
|
|
// ---- YAML serialization ----
|
|
|
|
const YAML_HEADER = [
|
|
"# compliance/sub-processors.yml — Third-party sub-processor inventory",
|
|
"# Generated by scripts/compliance/emit-sub-processors.mjs — do not edit manually.",
|
|
"# Run `pnpm compliance:sub-processors` to regenerate.",
|
|
].join("\n");
|
|
|
|
/** Quote a YAML string scalar only when necessary. */
|
|
function yamlStr(s) {
|
|
if (typeof s !== "string") return String(s);
|
|
if (
|
|
s === "" ||
|
|
["true", "false", "null", "yes", "no", "on", "off"].includes(s) ||
|
|
/[[{},:#&*!|>'"%@`\]]/u.test(s) ||
|
|
/^\s|\s$/.test(s)
|
|
) {
|
|
return `"${s.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
// package first (natural identifier), then remaining fields alphabetically
|
|
const ENTRY_FIELDS = [
|
|
"package",
|
|
"contact",
|
|
"data-sent",
|
|
"decision",
|
|
"dpa-signed",
|
|
"region",
|
|
"sccs-required",
|
|
"source",
|
|
"version",
|
|
];
|
|
|
|
/**
|
|
* Render sub-processors as deterministic YAML.
|
|
* Entries sorted by package name (done in buildSubProcessors).
|
|
* Fields rendered in a fixed order: package first, rest alphabetical.
|
|
*/
|
|
export function renderSubProcessorsYaml(entries) {
|
|
let yaml = YAML_HEADER + "\n";
|
|
yaml += "sub-processors:\n";
|
|
|
|
if (entries.length === 0) {
|
|
yaml += " []\n";
|
|
return yaml;
|
|
}
|
|
|
|
for (const entry of entries) {
|
|
let first = true;
|
|
for (const field of ENTRY_FIELDS) {
|
|
const value = entry[field];
|
|
if (value === undefined) continue;
|
|
|
|
const rendered =
|
|
typeof value === "boolean" ? String(value) : yamlStr(String(value));
|
|
|
|
if (first) {
|
|
yaml += ` - ${field}: ${rendered}\n`;
|
|
first = false;
|
|
} else {
|
|
yaml += ` ${field}: ${rendered}\n`;
|
|
}
|
|
}
|
|
}
|
|
|
|
return yaml;
|
|
}
|
|
|
|
// ---- CLI ----
|
|
|
|
function parseArgs(argv) {
|
|
const out = { mode: "write" }; // 'write' | 'print' | 'check'
|
|
for (let i = 2; i < argv.length; i++) {
|
|
if (argv[i] === "--print") out.mode = "print";
|
|
else if (argv[i] === "--check") out.mode = "check";
|
|
else if (argv[i] === "--help" || argv[i] === "-h") {
|
|
console.log(
|
|
[
|
|
"Usage: node scripts/compliance/emit-sub-processors.mjs [--print | --check]",
|
|
" (default): write compliance/sub-processors.yml",
|
|
" --print: write YAML to stdout",
|
|
" --check: diff vs committed file; exit 1 on mismatch",
|
|
].join("\n"),
|
|
);
|
|
process.exit(0);
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function main() {
|
|
const args = parseArgs(process.argv);
|
|
const repoRoot = process.cwd();
|
|
|
|
const traced = parseLibraryTraceSubProcessors(repoRoot);
|
|
const manual = loadManualEntries(repoRoot);
|
|
const entries = buildSubProcessors(traced, manual);
|
|
const yaml = renderSubProcessorsYaml(entries);
|
|
|
|
const outPath = path.resolve(repoRoot, OUTPUT_PATH);
|
|
|
|
if (args.mode === "print") {
|
|
process.stdout.write(yaml);
|
|
return;
|
|
}
|
|
|
|
if (args.mode === "check") {
|
|
if (!fs.existsSync(outPath)) {
|
|
process.stderr.write(
|
|
`[compliance:sub-processors] --check: no committed file at ${OUTPUT_PATH}\n` +
|
|
`Run \`pnpm compliance:sub-processors\` to generate it first.\n`,
|
|
);
|
|
process.exit(1);
|
|
}
|
|
const committed = fs.readFileSync(outPath, "utf8");
|
|
const diff = unifiedDiff(committed, yaml, OUTPUT_PATH);
|
|
if (diff === null) {
|
|
console.log(`✓ compliance:sub-processors — ${OUTPUT_PATH} is up to date`);
|
|
process.exit(0);
|
|
}
|
|
process.stderr.write(
|
|
`✗ compliance:sub-processors — ${OUTPUT_PATH} is out of date:\n${diff}\n`,
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Default: write to file
|
|
const dir = path.dirname(outPath);
|
|
if (!fs.existsSync(dir)) {
|
|
fs.mkdirSync(dir, { recursive: true });
|
|
}
|
|
fs.writeFileSync(outPath, yaml, "utf8");
|
|
console.log(`✓ compliance:sub-processors — wrote ${OUTPUT_PATH}`);
|
|
}
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
main();
|
|
}
|