diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cadd959..4ca30c7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -65,6 +65,6 @@ jobs: with: go-version: "1.23.2" - name: Validate contributors - run: go build ./scripts/contributors && ./contributors + run: go build ./cmd/readmevalidation && ./readmevalidation - name: Remove build file artifact - run: rm ./contributors + run: rm ./readmevalidation diff --git a/.gitignore b/.gitignore index 6ee570e..157c642 100644 --- a/.gitignore +++ b/.gitignore @@ -135,8 +135,8 @@ dist .yarn/install-state.gz .pnp.* -# Script output -/contributors +# Things needed for CI +/readmevalidation # Terraform files generated during testing .terraform* diff --git a/cmd/readmevalidation/contributors.go b/cmd/readmevalidation/contributors.go new file mode 100644 index 0000000..89a2b1a --- /dev/null +++ b/cmd/readmevalidation/contributors.go @@ -0,0 +1,340 @@ +package main + +import ( + "errors" + "fmt" + "log" + "net/url" + "os" + "path" + "slices" + "strings" + + "gopkg.in/yaml.v3" +) + +var validContributorStatuses = []string{"official", "partner", "community"} + +type contributorProfileFrontmatter struct { + DisplayName string `yaml:"display_name"` + Bio string `yaml:"bio"` + // Script assumes that if value is nil, the Registry site build step will + // backfill the value with the user's GitHub avatar URL + AvatarURL *string `yaml:"avatar"` + LinkedinURL *string `yaml:"linkedin"` + WebsiteURL *string `yaml:"website"` + SupportEmail *string `yaml:"support_email"` + ContributorStatus *string `yaml:"status"` +} + +type contributorProfile struct { + frontmatter contributorProfileFrontmatter + namespace string + filePath string +} + +func validateContributorDisplayName(displayName string) error { + if displayName == "" { + return fmt.Errorf("missing display_name") + } + + return nil +} + +func validateContributorLinkedinURL(linkedinURL *string) error { + if linkedinURL == nil { + return nil + } + + if _, err := url.ParseRequestURI(*linkedinURL); err != nil { + return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err) + } + + return nil +} + +func validateContributorSupportEmail(email *string) []error { + if email == nil { + return nil + } + + errs := []error{} + + // Can't 100% validate that this is correct without actually sending + // an email, and especially with some contributors being individual + // developers, we don't want to do that on every single run of the CI + // pipeline. Best we can do is verify the general structure + username, server, ok := strings.Cut(*email, "@") + if !ok { + errs = append(errs, fmt.Errorf("email address %q is missing @ symbol", *email)) + return errs + } + + if username == "" { + errs = append(errs, fmt.Errorf("email address %q is missing username", *email)) + } + + domain, tld, ok := strings.Cut(server, ".") + if !ok { + errs = append(errs, fmt.Errorf("email address %q is missing period for server segment", *email)) + return errs + } + + if domain == "" { + errs = append(errs, fmt.Errorf("email address %q is missing domain", *email)) + } + if tld == "" { + errs = append(errs, fmt.Errorf("email address %q is missing top-level domain", *email)) + } + if strings.Contains(*email, "?") { + errs = append(errs, errors.New("email is not allowed to contain query parameters")) + } + + return errs +} + +func validateContributorWebsite(websiteURL *string) error { + if websiteURL == nil { + return nil + } + + if _, err := url.ParseRequestURI(*websiteURL); err != nil { + return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err) + } + + return nil +} + +func validateContributorStatus(status *string) error { + if status == nil { + return nil + } + + if !slices.Contains(validContributorStatuses, *status) { + return fmt.Errorf("contributor status %q is not valid", *status) + } + + return nil +} + +// Can't validate the image actually leads to a valid resource in a pure +// function, but can at least catch obvious problems +func validateContributorAvatarURL(avatarURL *string) []error { + if avatarURL == nil { + return nil + } + + errs := []error{} + if *avatarURL == "" { + errs = append(errs, errors.New("avatar URL must be omitted or non-empty string")) + return errs + } + + // Have to use .Parse instead of .ParseRequestURI because this is the + // one field that's allowed to be a relative URL + if _, err := url.Parse(*avatarURL); err != nil { + errs = append(errs, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL)) + } + if strings.Contains(*avatarURL, "?") { + errs = append(errs, errors.New("avatar URL is not allowed to contain search parameters")) + } + + matched := false + for _, ff := range supportedAvatarFileFormats { + matched = strings.HasSuffix(*avatarURL, ff) + if matched { + break + } + } + if !matched { + segments := strings.Split(*avatarURL, ".") + fileExtension := segments[len(segments)-1] + errs = append(errs, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", "))) + } + + return errs +} + +func validateContributorYaml(yml contributorProfile) []error { + allErrs := []error{} + + if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil { + allErrs = append(allErrs, addFilePathToError(yml.filePath, err)) + } + if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil { + allErrs = append(allErrs, addFilePathToError(yml.filePath, err)) + } + if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil { + allErrs = append(allErrs, addFilePathToError(yml.filePath, err)) + } + if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil { + allErrs = append(allErrs, addFilePathToError(yml.filePath, err)) + } + + for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) { + allErrs = append(allErrs, addFilePathToError(yml.filePath, err)) + } + for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) { + allErrs = append(allErrs, addFilePathToError(yml.filePath, err)) + } + + return allErrs +} + +func parseContributorProfile(rm readme) (contributorProfile, error) { + fm, _, err := separateFrontmatter(rm.rawText) + if err != nil { + return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err) + } + + yml := contributorProfileFrontmatter{} + if err := yaml.Unmarshal([]byte(fm), &yml); err != nil { + return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err) + } + + return contributorProfile{ + filePath: rm.filePath, + frontmatter: yml, + namespace: strings.TrimSuffix(strings.TrimPrefix(rm.filePath, "registry/"), "/README.md"), + }, nil +} + +func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) { + profilesByNamespace := map[string]contributorProfile{} + yamlParsingErrors := []error{} + for _, rm := range readmeEntries { + p, err := parseContributorProfile(rm) + if err != nil { + yamlParsingErrors = append(yamlParsingErrors, err) + continue + } + + if prev, alreadyExists := profilesByNamespace[p.namespace]; alreadyExists { + yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: namespace %q conflicts with namespace from %q", p.filePath, p.namespace, prev.filePath)) + continue + } + profilesByNamespace[p.namespace] = p + } + if len(yamlParsingErrors) != 0 { + return nil, validationPhaseError{ + phase: validationPhaseReadmeParsing, + errors: yamlParsingErrors, + } + } + + yamlValidationErrors := []error{} + for _, p := range profilesByNamespace { + errors := validateContributorYaml(p) + if len(errors) > 0 { + yamlValidationErrors = append(yamlValidationErrors, errors...) + continue + } + } + if len(yamlValidationErrors) != 0 { + return nil, validationPhaseError{ + phase: validationPhaseReadmeParsing, + errors: yamlValidationErrors, + } + } + + return profilesByNamespace, nil +} + +func aggregateContributorReadmeFiles() ([]readme, error) { + dirEntries, err := os.ReadDir(rootRegistryPath) + if err != nil { + return nil, err + } + + allReadmeFiles := []readme{} + errs := []error{} + for _, e := range dirEntries { + dirPath := path.Join(rootRegistryPath, e.Name()) + if !e.IsDir() { + continue + } + + readmePath := path.Join(dirPath, "README.md") + rmBytes, err := os.ReadFile(readmePath) + if err != nil { + errs = append(errs, err) + continue + } + allReadmeFiles = append(allReadmeFiles, readme{ + filePath: readmePath, + rawText: string(rmBytes), + }) + } + + if len(errs) != 0 { + return nil, validationPhaseError{ + phase: validationPhaseFileLoad, + errors: errs, + } + } + + return allReadmeFiles, nil +} + +func validateContributorRelativeUrls(contributors map[string]contributorProfile) error { + // This function only validates relative avatar URLs for now, but it can be + // beefed up to validate more in the future + errs := []error{} + + for _, con := range contributors { + // If the avatar URL is missing, we'll just assume that the Registry + // site build step will take care of filling in the data properly + if con.frontmatter.AvatarURL == nil { + continue + } + + isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") || + strings.HasPrefix(*con.frontmatter.AvatarURL, "/") + if !isRelativeURL { + continue + } + + if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") { + errs = append(errs, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath)) + continue + } + + absolutePath := strings.TrimSuffix(con.filePath, "README.md") + + *con.frontmatter.AvatarURL + _, err := os.ReadFile(absolutePath) + if err != nil { + errs = append(errs, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL)) + } + } + + if len(errs) == 0 { + return nil + } + return validationPhaseError{ + phase: validationPhaseAssetCrossReference, + errors: errs, + } +} + +func validateAllContributorFiles() error { + allReadmeFiles, err := aggregateContributorReadmeFiles() + if err != nil { + return err + } + + log.Printf("Processing %d README files\n", len(allReadmeFiles)) + contributors, err := parseContributorFiles(allReadmeFiles) + if err != nil { + return err + } + log.Printf("Processed %d README files as valid contributor profiles", len(contributors)) + + err = validateContributorRelativeUrls(contributors) + if err != nil { + return err + } + log.Println("All relative URLs for READMEs are valid") + + log.Printf("Processed all READMEs in the %q directory\n", rootRegistryPath) + return nil +} diff --git a/cmd/readmevalidation/errors.go b/cmd/readmevalidation/errors.go new file mode 100644 index 0000000..db13edc --- /dev/null +++ b/cmd/readmevalidation/errors.go @@ -0,0 +1,28 @@ +package main + +import "fmt" + +// validationPhaseError represents an error that occurred during a specific +// phase of README validation. It should be used to collect ALL validation +// errors that happened during a specific phase, rather than the first one +// encountered. +type validationPhaseError struct { + phase validationPhase + errors []error +} + +var _ error = validationPhaseError{} + +func (vpe validationPhaseError) Error() string { + msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String()) + for _, e := range vpe.errors { + msg += fmt.Sprintf("\n- %v", e) + } + msg += "\n" + + return msg +} + +func addFilePathToError(filePath string, err error) error { + return fmt.Errorf("%q: %v", filePath, err) +} diff --git a/cmd/readmevalidation/main.go b/cmd/readmevalidation/main.go new file mode 100644 index 0000000..2c0f452 --- /dev/null +++ b/cmd/readmevalidation/main.go @@ -0,0 +1,39 @@ +// This package is for validating all contributors within the main Registry +// directory. It validates that it has nothing but sub-directories, and that +// each sub-directory has a README.md file. Each of those files must then +// describe a specific contributor. The contents of these files will be parsed +// by the Registry site build step, to be displayed in the Registry site's UI. +package main + +import ( + "fmt" + "log" + "os" +) + +func main() { + log.Println("Starting README validation") + + // If there are fundamental problems with how the repo is structured, we + // can't make any guarantees that any further validations will be relevant + // or accurate + repoErr := validateRepoStructure() + if repoErr != nil { + log.Println(repoErr) + os.Exit(1) + } + + errs := []error{} + err := validateAllContributorFiles() + if err != nil { + errs = append(errs, err) + } + + if len(errs) == 0 { + os.Exit(0) + } + for _, err := range errs { + fmt.Println(err) + } + os.Exit(1) +} diff --git a/cmd/readmevalidation/readmefiles.go b/cmd/readmevalidation/readmefiles.go new file mode 100644 index 0000000..69ccf9f --- /dev/null +++ b/cmd/readmevalidation/readmefiles.go @@ -0,0 +1,113 @@ +package main + +import ( + "bufio" + "errors" + "fmt" + "strings" +) + +const rootRegistryPath = "./registry" + +var supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"} + +// readme represents a single README file within the repo (usually within the +// top-level "/registry" directory). +type readme struct { + filePath string + rawText string +} + +// separateFrontmatter attempts to separate a README file's frontmatter content +// from the main README body, returning both values in that order. It does not +// validate whether the structure of the frontmatter is valid (i.e., that it's +// structured as YAML). +func separateFrontmatter(readmeText string) (string, string, error) { + if readmeText == "" { + return "", "", errors.New("README is empty") + } + + const fence = "---" + fm := "" + body := "" + fenceCount := 0 + lineScanner := bufio.NewScanner( + strings.NewReader(strings.TrimSpace(readmeText)), + ) + for lineScanner.Scan() { + nextLine := lineScanner.Text() + if fenceCount < 2 && nextLine == fence { + fenceCount++ + continue + } + // Break early if the very first line wasn't a fence, because then we + // know for certain that the README has problems + if fenceCount == 0 { + break + } + + // It should be safe to trim each line of the frontmatter on a per-line + // basis, because there shouldn't be any extra meaning attached to the + // indentation. The same does NOT apply to the README; best we can do is + // gather all the lines, and then trim around it + if inReadmeBody := fenceCount >= 2; inReadmeBody { + body += nextLine + "\n" + } else { + fm += strings.TrimSpace(nextLine) + "\n" + } + } + if fenceCount < 2 { + return "", "", errors.New("README does not have two sets of frontmatter fences") + } + if fm == "" { + return "", "", errors.New("readme has frontmatter fences but no frontmatter content") + } + + return fm, strings.TrimSpace(body), nil +} + +// validationPhase represents a specific phase during README validation. It is +// expected that each phase is discrete, and errors during one will prevent a +// future phase from starting. +type validationPhase int + +const ( + // validationPhaseFileStructureValidation indicates when the entire Registry + // directory is being verified for having all files be placed in the file + // system as expected. + validationPhaseFileStructureValidation validationPhase = iota + + // validationPhaseFileLoad indicates when README files are being read from + // the file system + validationPhaseFileLoad + + // validationPhaseReadmeParsing indicates when a README's frontmatter is + // being parsed as YAML. This phase does not include YAML validation. + validationPhaseReadmeParsing + + // validationPhaseReadmeValidation indicates when a README's frontmatter is + // being validated as proper YAML with expected keys. + validationPhaseReadmeValidation + + // validationPhaseAssetCrossReference indicates when a README's frontmatter + // is having all its relative URLs be validated for whether they point to + // valid resources. + validationPhaseAssetCrossReference +) + +func (p validationPhase) String() string { + switch p { + case validationPhaseFileStructureValidation: + return "File structure validation" + case validationPhaseFileLoad: + return "Filesystem reading" + case validationPhaseReadmeParsing: + return "README parsing" + case validationPhaseReadmeValidation: + return "README validation" + case validationPhaseAssetCrossReference: + return "Cross-referencing relative asset URLs" + default: + return fmt.Sprintf("Unknown validation phase: %d", p) + } +} diff --git a/cmd/readmevalidation/repostructure.go b/cmd/readmevalidation/repostructure.go new file mode 100644 index 0000000..164547f --- /dev/null +++ b/cmd/readmevalidation/repostructure.go @@ -0,0 +1,145 @@ +package main + +import ( + "errors" + "fmt" + "os" + "path" + "slices" + "strings" +) + +var ( + supportedResourceTypes = []string{"modules", "templates"} + supportedUserNameSpaceDirectories = append(supportedResourceTypes[:], ".icons", ".images") +) + +func validateCoderResourceSubdirectory(dirPath string) []error { + errs := []error{} + + subDir, err := os.Stat(dirPath) + if err != nil { + // It's valid for a specific resource directory not to exist. It's just + // that if it does exist, it must follow specific rules + if !errors.Is(err, os.ErrNotExist) { + errs = append(errs, addFilePathToError(dirPath, err)) + } + return errs + } + + if !subDir.IsDir() { + errs = append(errs, fmt.Errorf("%q: path is not a directory", dirPath)) + return errs + } + + files, err := os.ReadDir(dirPath) + if err != nil { + errs = append(errs, addFilePathToError(dirPath, err)) + return errs + } + for _, f := range files { + // The .coder subdirectories are sometimes generated as part of Bun + // tests. These subdirectories will never be committed to the repo, but + // in the off chance that they don't get cleaned up properly, we want to + // skip over them + if !f.IsDir() || f.Name() == ".coder" { + continue + } + + resourceReadmePath := path.Join(dirPath, f.Name(), "README.md") + _, err := os.Stat(resourceReadmePath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + errs = append(errs, fmt.Errorf("%q: 'README.md' does not exist", resourceReadmePath)) + } else { + errs = append(errs, addFilePathToError(resourceReadmePath, err)) + } + } + + mainTerraformPath := path.Join(dirPath, f.Name(), "main.tf") + _, err = os.Stat(mainTerraformPath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + errs = append(errs, fmt.Errorf("%q: 'main.tf' file does not exist", mainTerraformPath)) + } else { + errs = append(errs, addFilePathToError(mainTerraformPath, err)) + } + } + + } + + return errs +} + +func validateRegistryDirectory() []error { + userDirs, err := os.ReadDir(rootRegistryPath) + if err != nil { + return []error{err} + } + + allErrs := []error{} + for _, d := range userDirs { + dirPath := path.Join(rootRegistryPath, d.Name()) + if !d.IsDir() { + allErrs = append(allErrs, fmt.Errorf("detected non-directory file %q at base of main Registry directory", dirPath)) + continue + } + + contributorReadmePath := path.Join(dirPath, "README.md") + _, err := os.Stat(contributorReadmePath) + if err != nil { + allErrs = append(allErrs, err) + } + + files, err := os.ReadDir(dirPath) + if err != nil { + allErrs = append(allErrs, err) + continue + } + + for _, f := range files { + // Todo: Decide if there's anything more formal that we want to + // ensure about non-directories scoped to user namespaces + if !f.IsDir() { + continue + } + + segment := f.Name() + filePath := path.Join(dirPath, segment) + + if !slices.Contains(supportedUserNameSpaceDirectories, segment) { + allErrs = append(allErrs, fmt.Errorf("%q: only these sub-directories are allowed at top of user namespace: [%s]", filePath, strings.Join(supportedUserNameSpaceDirectories, ", "))) + continue + } + + if slices.Contains(supportedResourceTypes, segment) { + errs := validateCoderResourceSubdirectory(filePath) + if len(errs) != 0 { + allErrs = append(allErrs, errs...) + } + } + } + } + + return allErrs +} + +func validateRepoStructure() error { + var problems []error + if errs := validateRegistryDirectory(); len(errs) != 0 { + problems = append(problems, errs...) + } + + _, err := os.Stat("./.icons") + if err != nil { + problems = append(problems, errors.New("missing top-level .icons directory (used for storing reusable Coder resource icons)")) + } + + if len(problems) != 0 { + return validationPhaseError{ + phase: validationPhaseFileStructureValidation, + errors: problems, + } + } + return nil +} diff --git a/registry/hashicorp/README.md b/registry/hashicorp/README.md deleted file mode 100644 index 59bbe8a..0000000 --- a/registry/hashicorp/README.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -display_name: HashiCorp -bio: HashiCorp, an IBM company, empowers organizations to automate and secure multi-cloud and hybrid environments with The Infrastructure Cloud™. Our suite of Infrastructure Lifecycle Management and Security Lifecycle Management solutions are built on projects with source code freely available at their core. The HashiCorp suite underpins the world's most critical applications, helping enterprises achieve efficiency, security, and scalability at any stage of their cloud journey. -github: hashicorp -linkedin: https://www.linkedin.com/company/hashicorp -website: https://www.hashicorp.com/ -status: partner ---- diff --git a/registry/jfrog/README.md b/registry/jfrog/README.md deleted file mode 100644 index 8dea670..0000000 --- a/registry/jfrog/README.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -display_name: Jfrog -bio: At JFrog, we are making endless software versions a thing of the past, with liquid software that flows continuously and automatically from build all the way through to production. -github: jfrog -linkedin: https://www.linkedin.com/company/jfrog-ltd -website: https://jfrog.com/ -status: partner ---- diff --git a/registry/nataindata/README.md b/registry/nataindata/README.md index ddc5095..5f29181 100644 --- a/registry/nataindata/README.md +++ b/registry/nataindata/README.md @@ -3,5 +3,5 @@ display_name: Nataindata bio: Data engineer github: nataindata website: https://www.nataindata.com -status: community +status: partner --- diff --git a/registry/nataindata/apache-airflow/README.md b/registry/nataindata/modules/apache-airflow/README.md similarity index 100% rename from registry/nataindata/apache-airflow/README.md rename to registry/nataindata/modules/apache-airflow/README.md diff --git a/registry/nataindata/apache-airflow/main.tf b/registry/nataindata/modules/apache-airflow/main.tf similarity index 100% rename from registry/nataindata/apache-airflow/main.tf rename to registry/nataindata/modules/apache-airflow/main.tf diff --git a/registry/nataindata/apache-airflow/run.sh b/registry/nataindata/modules/apache-airflow/run.sh similarity index 100% rename from registry/nataindata/apache-airflow/run.sh rename to registry/nataindata/modules/apache-airflow/run.sh diff --git a/scripts/contributors/contributors.go b/scripts/contributors/contributors.go deleted file mode 100644 index 02823f2..0000000 --- a/scripts/contributors/contributors.go +++ /dev/null @@ -1,446 +0,0 @@ -package main - -import ( - "bufio" - "errors" - "fmt" - "net/url" - "os" - "path" - "slices" - "strings" - - "gopkg.in/yaml.v3" -) - -const rootRegistryPath = "./registry" - -var ( - validContributorStatuses = []string{"official", "partner", "community"} - supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"} -) - -type readme struct { - filePath string - rawText string -} - -type contributorProfileFrontmatter struct { - DisplayName string `yaml:"display_name"` - Bio string `yaml:"bio"` - GithubUsername string `yaml:"github"` - // Script assumes that if value is nil, the Registry site build step will - // backfill the value with the user's GitHub avatar URL - AvatarURL *string `yaml:"avatar"` - LinkedinURL *string `yaml:"linkedin"` - WebsiteURL *string `yaml:"website"` - SupportEmail *string `yaml:"support_email"` - EmployerGithubUsername *string `yaml:"employer_github"` - ContributorStatus *string `yaml:"status"` -} - -type contributorProfile struct { - frontmatter contributorProfileFrontmatter - filePath string -} - -var _ error = validationPhaseError{} - -type validationPhaseError struct { - phase string - errors []error -} - -func (vpe validationPhaseError) Error() string { - validationStrs := []string{} - for _, e := range vpe.errors { - validationStrs = append(validationStrs, fmt.Sprintf("- %v", e)) - } - slices.Sort(validationStrs) - - msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase) - msg += strings.Join(validationStrs, "\n") - msg += "\n" - - return msg -} - -func extractFrontmatter(readmeText string) (string, error) { - if readmeText == "" { - return "", errors.New("README is empty") - } - - const fence = "---" - fm := "" - fenceCount := 0 - lineScanner := bufio.NewScanner( - strings.NewReader(strings.TrimSpace(readmeText)), - ) - for lineScanner.Scan() { - nextLine := lineScanner.Text() - if fenceCount == 0 && nextLine != fence { - return "", errors.New("README does not start with frontmatter fence") - } - - if nextLine != fence { - fm += nextLine + "\n" - continue - } - - fenceCount++ - if fenceCount >= 2 { - break - } - } - - if fenceCount == 1 { - return "", errors.New("README does not have two sets of frontmatter fences") - } - return fm, nil -} - -func validateContributorGithubUsername(githubUsername string) error { - if githubUsername == "" { - return errors.New("missing GitHub username") - } - - lower := strings.ToLower(githubUsername) - if uriSafe := url.PathEscape(lower); uriSafe != lower { - return fmt.Errorf("gitHub username %q is not a valid URL path segment", githubUsername) - } - - return nil -} - -func validateContributorEmployerGithubUsername( - employerGithubUsername *string, - githubUsername string, -) []error { - if employerGithubUsername == nil { - return nil - } - - problems := []error{} - if *employerGithubUsername == "" { - problems = append(problems, errors.New("company_github field is defined but has empty value")) - return problems - } - - lower := strings.ToLower(*employerGithubUsername) - if uriSafe := url.PathEscape(lower); uriSafe != lower { - problems = append(problems, fmt.Errorf("gitHub company username %q is not a valid URL path segment", *employerGithubUsername)) - } - - if *employerGithubUsername == githubUsername { - problems = append(problems, fmt.Errorf("cannot list own GitHub name (%q) as employer", githubUsername)) - } - - return problems -} - -func validateContributorDisplayName(displayName string) error { - if displayName == "" { - return fmt.Errorf("missing display_name") - } - - return nil -} - -func validateContributorLinkedinURL(linkedinURL *string) error { - if linkedinURL == nil { - return nil - } - - if _, err := url.ParseRequestURI(*linkedinURL); err != nil { - return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err) - } - - return nil -} - -func validateContributorSupportEmail(email *string) []error { - if email == nil { - return nil - } - - problems := []error{} - - // Can't 100% validate that this is correct without actually sending - // an email, and especially with some contributors being individual - // developers, we don't want to do that on every single run of the CI - // pipeline. Best we can do is verify the general structure - username, server, ok := strings.Cut(*email, "@") - if !ok { - problems = append(problems, fmt.Errorf("email address %q is missing @ symbol", *email)) - return problems - } - - if username == "" { - problems = append(problems, fmt.Errorf("email address %q is missing username", *email)) - } - - domain, tld, ok := strings.Cut(server, ".") - if !ok { - problems = append(problems, fmt.Errorf("email address %q is missing period for server segment", *email)) - return problems - } - - if domain == "" { - problems = append(problems, fmt.Errorf("email address %q is missing domain", *email)) - } - if tld == "" { - problems = append(problems, fmt.Errorf("email address %q is missing top-level domain", *email)) - } - if strings.Contains(*email, "?") { - problems = append(problems, errors.New("email is not allowed to contain query parameters")) - } - - return problems -} - -func validateContributorWebsite(websiteURL *string) error { - if websiteURL == nil { - return nil - } - - if _, err := url.ParseRequestURI(*websiteURL); err != nil { - return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err) - } - - return nil -} - -func validateContributorStatus(status *string) error { - if status == nil { - return nil - } - - if !slices.Contains(validContributorStatuses, *status) { - return fmt.Errorf("contributor status %q is not valid", *status) - } - - return nil -} - -// Can't validate the image actually leads to a valid resource in a pure -// function, but can at least catch obvious problems -func validateContributorAvatarURL(avatarURL *string) []error { - if avatarURL == nil { - return nil - } - - problems := []error{} - if *avatarURL == "" { - problems = append(problems, errors.New("avatar URL must be omitted or non-empty string")) - return problems - } - - // Have to use .Parse instead of .ParseRequestURI because this is the - // one field that's allowed to be a relative URL - if _, err := url.Parse(*avatarURL); err != nil { - problems = append(problems, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL)) - } - if strings.Contains(*avatarURL, "?") { - problems = append(problems, errors.New("avatar URL is not allowed to contain search parameters")) - } - - matched := false - for _, ff := range supportedAvatarFileFormats { - matched = strings.HasSuffix(*avatarURL, ff) - if matched { - break - } - } - if !matched { - segments := strings.Split(*avatarURL, ".") - fileExtension := segments[len(segments)-1] - problems = append(problems, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", "))) - } - - return problems -} - -func addFilePathToError(filePath string, err error) error { - return fmt.Errorf("%q: %v", filePath, err) -} - -func validateContributorYaml(yml contributorProfile) []error { - allProblems := []error{} - - if err := validateContributorGithubUsername(yml.frontmatter.GithubUsername); err != nil { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - - for _, err := range validateContributorEmployerGithubUsername(yml.frontmatter.EmployerGithubUsername, yml.frontmatter.GithubUsername) { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) { - allProblems = append(allProblems, addFilePathToError(yml.filePath, err)) - } - - return allProblems -} - -func parseContributorProfile(rm readme) (contributorProfile, error) { - fm, err := extractFrontmatter(rm.rawText) - if err != nil { - return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err) - } - - yml := contributorProfileFrontmatter{} - if err := yaml.Unmarshal([]byte(fm), &yml); err != nil { - return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err) - } - - return contributorProfile{ - filePath: rm.filePath, - frontmatter: yml, - }, nil -} - -func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) { - profilesByUsername := map[string]contributorProfile{} - yamlParsingErrors := []error{} - for _, rm := range readmeEntries { - p, err := parseContributorProfile(rm) - if err != nil { - yamlParsingErrors = append(yamlParsingErrors, err) - continue - } - - if prev, alreadyExists := profilesByUsername[p.frontmatter.GithubUsername]; alreadyExists { - yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: GitHub name %s conflicts with field defined in %q", p.filePath, p.frontmatter.GithubUsername, prev.filePath)) - continue - } - profilesByUsername[p.frontmatter.GithubUsername] = p - } - if len(yamlParsingErrors) != 0 { - return nil, validationPhaseError{ - phase: "YAML parsing", - errors: yamlParsingErrors, - } - } - - employeeGithubGroups := map[string][]string{} - yamlValidationErrors := []error{} - for _, p := range profilesByUsername { - errors := validateContributorYaml(p) - if len(errors) > 0 { - yamlValidationErrors = append(yamlValidationErrors, errors...) - continue - } - - if p.frontmatter.EmployerGithubUsername != nil { - employeeGithubGroups[*p.frontmatter.EmployerGithubUsername] = append( - employeeGithubGroups[*p.frontmatter.EmployerGithubUsername], - p.frontmatter.GithubUsername, - ) - } - } - for companyName, group := range employeeGithubGroups { - if _, found := profilesByUsername[companyName]; found { - continue - } - yamlValidationErrors = append(yamlValidationErrors, fmt.Errorf("company %q does not exist in %q directory but is referenced by these profiles: [%s]", companyName, rootRegistryPath, strings.Join(group, ", "))) - } - if len(yamlValidationErrors) != 0 { - return nil, validationPhaseError{ - phase: "Raw YAML Validation", - errors: yamlValidationErrors, - } - } - - return profilesByUsername, nil -} - -func aggregateContributorReadmeFiles() ([]readme, error) { - dirEntries, err := os.ReadDir(rootRegistryPath) - if err != nil { - return nil, err - } - - allReadmeFiles := []readme{} - problems := []error{} - for _, e := range dirEntries { - dirPath := path.Join(rootRegistryPath, e.Name()) - if !e.IsDir() { - problems = append(problems, fmt.Errorf("detected non-directory file %q at base of main Registry directory", dirPath)) - continue - } - - readmePath := path.Join(dirPath, "README.md") - rmBytes, err := os.ReadFile(readmePath) - if err != nil { - problems = append(problems, err) - continue - } - allReadmeFiles = append(allReadmeFiles, readme{ - filePath: readmePath, - rawText: string(rmBytes), - }) - } - - if len(problems) != 0 { - return nil, validationPhaseError{ - phase: "FileSystem reading", - errors: problems, - } - } - - return allReadmeFiles, nil -} - -func validateRelativeUrls( - contributors map[string]contributorProfile, -) error { - // This function only validates relative avatar URLs for now, but it can be - // beefed up to validate more in the future - problems := []error{} - - for _, con := range contributors { - // If the avatar URL is missing, we'll just assume that the Registry - // site build step will take care of filling in the data properly - if con.frontmatter.AvatarURL == nil { - continue - } - if isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") || - strings.HasPrefix(*con.frontmatter.AvatarURL, "/"); !isRelativeURL { - continue - } - - if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") { - problems = append(problems, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath)) - continue - } - - absolutePath := strings.TrimSuffix(con.filePath, "README.md") + - *con.frontmatter.AvatarURL - _, err := os.ReadFile(absolutePath) - if err != nil { - problems = append(problems, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL)) - } - } - - if len(problems) == 0 { - return nil - } - return validationPhaseError{ - phase: "Relative URL validation", - errors: problems, - } -} diff --git a/scripts/contributors/main.go b/scripts/contributors/main.go deleted file mode 100644 index 9091318..0000000 --- a/scripts/contributors/main.go +++ /dev/null @@ -1,39 +0,0 @@ -// This package is for validating all contributors within the main Registry -// directory. It validates that it has nothing but sub-directories, and that -// each sub-directory has a README.md file. Each of those files must then -// describe a specific contributor. The contents of these files will be parsed -// by the Registry site build step, to be displayed in the Registry site's UI. -package main - -import ( - "log" -) - -func main() { - log.Println("Starting README validation") - allReadmeFiles, err := aggregateContributorReadmeFiles() - if err != nil { - log.Panic(err) - } - - log.Printf("Processing %d README files\n", len(allReadmeFiles)) - contributors, err := parseContributorFiles(allReadmeFiles) - log.Printf( - "Processed %d README files as valid contributor profiles", - len(contributors), - ) - if err != nil { - log.Panic(err) - } - - err = validateRelativeUrls(contributors) - if err != nil { - log.Panic(err) - } - log.Println("All relative URLs for READMEs are valid") - - log.Printf( - "Processed all READMEs in the %q directory\n", - rootRegistryPath, - ) -}