#!/usr/bin/env node /** * emit-sub-processors.mjs — Third-party sub-processor inventory emitter. * * Walks docs/library-decisions/*.md, filters entries where is-sub-processor: true, * merges compliance/sub-processors.manual.yml (if present) with source: manual flag, * and emits a sorted deterministic YAML inventory at compliance/sub-processors.yml. * * Usage: * node scripts/compliance/emit-sub-processors.mjs # write compliance/sub-processors.yml * node scripts/compliance/emit-sub-processors.mjs --print # write to stdout * node scripts/compliance/emit-sub-processors.mjs --check # diff vs committed file; exit 1 on mismatch */ import fs from "node:fs"; import path from "node:path"; import { unifiedDiff, REPO_ROOT } from "./emit-data-map.mjs"; export { unifiedDiff, REPO_ROOT }; export const OUTPUT_PATH = "compliance/sub-processors.yml"; export const MANUAL_PATH = "compliance/sub-processors.manual.yml"; // ---- Frontmatter parser ---- /** Parse a YAML scalar value string into its JS equivalent. */ function parseScalarValue(raw) { if (raw === "true") return true; if (raw === "false") return false; if (raw === "null") return null; if ( (raw.startsWith('"') && raw.endsWith('"')) || (raw.startsWith("'") && raw.endsWith("'")) ) { return raw.slice(1, -1); } return raw; } /** * Parse top-level scalar fields from YAML frontmatter in a markdown file. * Returns an object of key → value pairs, or null if no frontmatter is found. * Skips comment lines, empty lines, and indented lines (nested block fields). */ export function parseFrontmatter(src) { const match = /^---\r?\n([\s\S]*?)\r?\n---/.exec(src); if (!match) return null; const result = {}; for (const line of match[1].split("\n")) { if (!line.trim() || line.startsWith("#")) continue; if (line.startsWith(" ") || line.startsWith("\t")) continue; const colonIdx = line.indexOf(":"); if (colonIdx === -1) continue; const key = line.slice(0, colonIdx).trim(); const rawValue = line.slice(colonIdx + 1).trim(); if (!rawValue) continue; // block header like "filter-results:" result[key] = parseScalarValue(rawValue); } return result; } // ---- Library decision file discovery ---- /** * Find all library decision markdown files under docs/library-decisions/. * Excludes _template.md and any file starting with `_`. */ export function findLibraryDecisionFiles(repoRoot = REPO_ROOT) { const dir = path.join(repoRoot, "docs", "library-decisions"); if (!fs.existsSync(dir)) return []; return fs .readdirSync(dir) .sort() .filter((f) => f.endsWith(".md") && !f.startsWith("_")) .map((f) => path.join(dir, f)); } // ---- Sub-processor parsing from library traces ---- const TRACE_FIELDS = [ "package", "version", "decision", "data-sent", "region", "dpa-signed", "sccs-required", "contact", ]; /** * Walk library decision files and return sub-processor entries where * is-sub-processor: true. Each entry gets source: "library-trace" injected. */ export function parseLibraryTraceSubProcessors(repoRoot = REPO_ROOT) { const files = findLibraryDecisionFiles(repoRoot); const entries = []; for (const filePath of files) { let src; try { src = fs.readFileSync(filePath, "utf8"); } catch { continue; } const meta = parseFrontmatter(src); if (!meta || meta["is-sub-processor"] !== true) continue; const entry = { source: "library-trace" }; for (const field of TRACE_FIELDS) { if (meta[field] !== undefined) entry[field] = meta[field]; } entries.push(entry); } return entries; } // ---- Manual entries ---- /** * Parse a simple YAML list-of-objects: each item starts with "- key: value" * and continuation lines are indented with spaces or tabs. * Returns an array of plain objects. */ export function parseSimpleYamlList(src) { const entries = []; let current = null; for (const line of src.split("\n")) { const trimmed = line.trim(); if (!trimmed || trimmed.startsWith("#")) continue; if (line.startsWith("- ")) { if (current) entries.push(current); current = {}; const rest = line.slice(2).trim(); const colonIdx = rest.indexOf(":"); if (colonIdx !== -1) { const key = rest.slice(0, colonIdx).trim(); const rawVal = rest.slice(colonIdx + 1).trim(); if (rawVal) current[key] = parseScalarValue(rawVal); } } else if (current && (line.startsWith(" ") || line.startsWith("\t"))) { const colonIdx = line.indexOf(":"); if (colonIdx !== -1) { const key = line.slice(0, colonIdx).trim(); const rawVal = line.slice(colonIdx + 1).trim(); if (rawVal) current[key] = parseScalarValue(rawVal); } } } if (current) entries.push(current); return entries; } /** * Load manual sub-processor entries from compliance/sub-processors.manual.yml. * Returns empty array if the file doesn't exist (graceful skip). * Injects source: "manual" into each entry. */ export function loadManualEntries(repoRoot = REPO_ROOT) { const manualPath = path.resolve(repoRoot, MANUAL_PATH); if (!fs.existsSync(manualPath)) return []; let src; try { src = fs.readFileSync(manualPath, "utf8"); } catch { return []; } return parseSimpleYamlList(src).map((entry) => ({ ...entry, source: "manual", })); } // ---- Builder ---- /** * Merge library-trace and manual sub-processor entries. * Sorts by package name for deterministic output. */ export function buildSubProcessors(traced, manual) { const all = [...traced, ...manual]; all.sort((a, b) => String(a.package ?? "").localeCompare(String(b.package ?? "")), ); return all; } // ---- YAML serialization ---- const YAML_HEADER = [ "# compliance/sub-processors.yml — Third-party sub-processor inventory", "# Generated by scripts/compliance/emit-sub-processors.mjs — do not edit manually.", "# Run `pnpm compliance:sub-processors` to regenerate.", ].join("\n"); /** Quote a YAML string scalar only when necessary. */ function yamlStr(s) { if (typeof s !== "string") return String(s); if ( s === "" || ["true", "false", "null", "yes", "no", "on", "off"].includes(s) || /[[{},:#&*!|>'"%@`\]]/u.test(s) || /^\s|\s$/.test(s) ) { return `"${s.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; } return s; } // package first (natural identifier), then remaining fields alphabetically const ENTRY_FIELDS = [ "package", "contact", "data-sent", "decision", "dpa-signed", "region", "sccs-required", "source", "version", ]; /** * Render sub-processors as deterministic YAML. * Entries sorted by package name (done in buildSubProcessors). * Fields rendered in a fixed order: package first, rest alphabetical. */ export function renderSubProcessorsYaml(entries) { let yaml = YAML_HEADER + "\n"; yaml += "sub-processors:\n"; if (entries.length === 0) { yaml += " []\n"; return yaml; } for (const entry of entries) { let first = true; for (const field of ENTRY_FIELDS) { const value = entry[field]; if (value === undefined) continue; const rendered = typeof value === "boolean" ? String(value) : yamlStr(String(value)); if (first) { yaml += ` - ${field}: ${rendered}\n`; first = false; } else { yaml += ` ${field}: ${rendered}\n`; } } } return yaml; } // ---- CLI ---- function parseArgs(argv) { const out = { mode: "write" }; // 'write' | 'print' | 'check' for (let i = 2; i < argv.length; i++) { if (argv[i] === "--print") out.mode = "print"; else if (argv[i] === "--check") out.mode = "check"; else if (argv[i] === "--help" || argv[i] === "-h") { console.log( [ "Usage: node scripts/compliance/emit-sub-processors.mjs [--print | --check]", " (default): write compliance/sub-processors.yml", " --print: write YAML to stdout", " --check: diff vs committed file; exit 1 on mismatch", ].join("\n"), ); process.exit(0); } } return out; } function main() { const args = parseArgs(process.argv); const repoRoot = process.cwd(); const traced = parseLibraryTraceSubProcessors(repoRoot); const manual = loadManualEntries(repoRoot); const entries = buildSubProcessors(traced, manual); const yaml = renderSubProcessorsYaml(entries); const outPath = path.resolve(repoRoot, OUTPUT_PATH); if (args.mode === "print") { process.stdout.write(yaml); return; } if (args.mode === "check") { if (!fs.existsSync(outPath)) { process.stderr.write( `[compliance:sub-processors] --check: no committed file at ${OUTPUT_PATH}\n` + `Run \`pnpm compliance:sub-processors\` to generate it first.\n`, ); process.exit(1); } const committed = fs.readFileSync(outPath, "utf8"); const diff = unifiedDiff(committed, yaml, OUTPUT_PATH); if (diff === null) { console.log(`✓ compliance:sub-processors — ${OUTPUT_PATH} is up to date`); process.exit(0); } process.stderr.write( `✗ compliance:sub-processors — ${OUTPUT_PATH} is out of date:\n${diff}\n`, ); process.exit(1); } // Default: write to file const dir = path.dirname(outPath); if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } fs.writeFileSync(outPath, yaml, "utf8"); console.log(`✓ compliance:sub-processors — wrote ${OUTPUT_PATH}`); } if (import.meta.url === `file://${process.argv[1]}`) { main(); }