Skip to content

fix: update repo structure validation logic to disallow false positives #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,6 @@ jobs:
with:
go-version: "1.23.2"
- name: Validate contributors
run: go build ./scripts/contributors && ./contributors
run: go build ./cmd/readmevalidation && ./readmevalidation
- name: Remove build file artifact
run: rm ./contributors
run: rm ./readmevalidation
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ dist
.yarn/install-state.gz
.pnp.*

# Script output
/contributors
# Things needed for CI
/readmevalidation

# Terraform files generated during testing
.terraform*
Expand Down
340 changes: 340 additions & 0 deletions cmd/readmevalidation/contributors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
package main

import (
"errors"
"fmt"
"log"
"net/url"
"os"
"path"
"slices"
"strings"

"gopkg.in/yaml.v3"
)

var validContributorStatuses = []string{"official", "partner", "community"}

type contributorProfileFrontmatter struct {
DisplayName string `yaml:"display_name"`
Bio string `yaml:"bio"`
// Script assumes that if value is nil, the Registry site build step will
// backfill the value with the user's GitHub avatar URL
AvatarURL *string `yaml:"avatar"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
ContributorStatus *string `yaml:"status"`
}

type contributorProfile struct {
frontmatter contributorProfileFrontmatter
namespace string
filePath string
}

func validateContributorDisplayName(displayName string) error {
if displayName == "" {
return fmt.Errorf("missing display_name")
}

return nil
}

func validateContributorLinkedinURL(linkedinURL *string) error {
if linkedinURL == nil {
return nil
}

if _, err := url.ParseRequestURI(*linkedinURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err)
}

return nil
}

func validateContributorSupportEmail(email *string) []error {
if email == nil {
return nil
}

errs := []error{}

// Can't 100% validate that this is correct without actually sending
// an email, and especially with some contributors being individual
// developers, we don't want to do that on every single run of the CI
// pipeline. Best we can do is verify the general structure
username, server, ok := strings.Cut(*email, "@")
if !ok {
errs = append(errs, fmt.Errorf("email address %q is missing @ symbol", *email))
return errs
}

if username == "" {
errs = append(errs, fmt.Errorf("email address %q is missing username", *email))
}

domain, tld, ok := strings.Cut(server, ".")
if !ok {
errs = append(errs, fmt.Errorf("email address %q is missing period for server segment", *email))
return errs
}

if domain == "" {
errs = append(errs, fmt.Errorf("email address %q is missing domain", *email))
}
if tld == "" {
errs = append(errs, fmt.Errorf("email address %q is missing top-level domain", *email))
}
if strings.Contains(*email, "?") {
errs = append(errs, errors.New("email is not allowed to contain query parameters"))
}

return errs
}

func validateContributorWebsite(websiteURL *string) error {
if websiteURL == nil {
return nil
}

if _, err := url.ParseRequestURI(*websiteURL); err != nil {
return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err)
}

return nil
}

func validateContributorStatus(status *string) error {
if status == nil {
return nil
}

if !slices.Contains(validContributorStatuses, *status) {
return fmt.Errorf("contributor status %q is not valid", *status)
}

return nil
}

// Can't validate the image actually leads to a valid resource in a pure
// function, but can at least catch obvious problems
func validateContributorAvatarURL(avatarURL *string) []error {
if avatarURL == nil {
return nil
}

errs := []error{}
if *avatarURL == "" {
errs = append(errs, errors.New("avatar URL must be omitted or non-empty string"))
return errs
}

// Have to use .Parse instead of .ParseRequestURI because this is the
// one field that's allowed to be a relative URL
if _, err := url.Parse(*avatarURL); err != nil {
errs = append(errs, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL))
}
if strings.Contains(*avatarURL, "?") {
errs = append(errs, errors.New("avatar URL is not allowed to contain search parameters"))
}

matched := false
for _, ff := range supportedAvatarFileFormats {
matched = strings.HasSuffix(*avatarURL, ff)
if matched {
break
}
}
if !matched {
segments := strings.Split(*avatarURL, ".")
fileExtension := segments[len(segments)-1]
errs = append(errs, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", ")))
}

return errs
}

func validateContributorYaml(yml contributorProfile) []error {
allErrs := []error{}

if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}

for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}
for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) {
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
}

return allErrs
}

func parseContributorProfile(rm readme) (contributorProfile, error) {
fm, _, err := separateFrontmatter(rm.rawText)
if err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
}

yml := contributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err)
}

return contributorProfile{
filePath: rm.filePath,
frontmatter: yml,
namespace: strings.TrimSuffix(strings.TrimPrefix(rm.filePath, "registry/"), "/README.md"),
}, nil
}

func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) {
profilesByNamespace := map[string]contributorProfile{}
yamlParsingErrors := []error{}
for _, rm := range readmeEntries {
p, err := parseContributorProfile(rm)
if err != nil {
yamlParsingErrors = append(yamlParsingErrors, err)
continue
}

if prev, alreadyExists := profilesByNamespace[p.namespace]; alreadyExists {
yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: namespace %q conflicts with namespace from %q", p.filePath, p.namespace, prev.filePath))
continue
}
profilesByNamespace[p.namespace] = p
}
if len(yamlParsingErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadmeParsing,
errors: yamlParsingErrors,
}
}

yamlValidationErrors := []error{}
for _, p := range profilesByNamespace {
errors := validateContributorYaml(p)
if len(errors) > 0 {
yamlValidationErrors = append(yamlValidationErrors, errors...)
continue
}
}
if len(yamlValidationErrors) != 0 {
return nil, validationPhaseError{
phase: validationPhaseReadmeParsing,
errors: yamlValidationErrors,
}
}

return profilesByNamespace, nil
}

func aggregateContributorReadmeFiles() ([]readme, error) {
dirEntries, err := os.ReadDir(rootRegistryPath)
if err != nil {
return nil, err
}

allReadmeFiles := []readme{}
errs := []error{}
for _, e := range dirEntries {
dirPath := path.Join(rootRegistryPath, e.Name())
if !e.IsDir() {
continue
}

readmePath := path.Join(dirPath, "README.md")
rmBytes, err := os.ReadFile(readmePath)
if err != nil {
errs = append(errs, err)
continue
}
allReadmeFiles = append(allReadmeFiles, readme{
filePath: readmePath,
rawText: string(rmBytes),
})
}

if len(errs) != 0 {
return nil, validationPhaseError{
phase: validationPhaseFileLoad,
errors: errs,
}
}

return allReadmeFiles, nil
}

func validateContributorRelativeUrls(contributors map[string]contributorProfile) error {
// This function only validates relative avatar URLs for now, but it can be
// beefed up to validate more in the future
errs := []error{}

for _, con := range contributors {
// If the avatar URL is missing, we'll just assume that the Registry
// site build step will take care of filling in the data properly
if con.frontmatter.AvatarURL == nil {
continue
}

isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") ||
strings.HasPrefix(*con.frontmatter.AvatarURL, "/")
if !isRelativeURL {
continue
}

if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") {
errs = append(errs, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath))
continue
}

absolutePath := strings.TrimSuffix(con.filePath, "README.md") +
*con.frontmatter.AvatarURL
_, err := os.ReadFile(absolutePath)
if err != nil {
errs = append(errs, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL))
}
}

if len(errs) == 0 {
return nil
}
return validationPhaseError{
phase: validationPhaseAssetCrossReference,
errors: errs,
}
}

func validateAllContributorFiles() error {
allReadmeFiles, err := aggregateContributorReadmeFiles()
if err != nil {
return err
}

log.Printf("Processing %d README files\n", len(allReadmeFiles))
contributors, err := parseContributorFiles(allReadmeFiles)
if err != nil {
return err
}
log.Printf("Processed %d README files as valid contributor profiles", len(contributors))

err = validateContributorRelativeUrls(contributors)
if err != nil {
return err
}
log.Println("All relative URLs for READMEs are valid")

log.Printf("Processed all READMEs in the %q directory\n", rootRegistryPath)
return nil
}
28 changes: 28 additions & 0 deletions cmd/readmevalidation/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package main

import "fmt"

// validationPhaseError represents an error that occurred during a specific
// phase of README validation. It should be used to collect ALL validation
// errors that happened during a specific phase, rather than the first one
// encountered.
type validationPhaseError struct {
phase validationPhase
errors []error
}

var _ error = validationPhaseError{}

func (vpe validationPhaseError) Error() string {
msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String())
for _, e := range vpe.errors {
msg += fmt.Sprintf("\n- %v", e)
}
msg += "\n"

return msg
}

func addFilePathToError(filePath string, err error) error {
return fmt.Errorf("%q: %v", filePath, err)
}
Loading