import os import requests from devops.azure import get_token from devops.devops import Organization, Project, Repository, Item import logging fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') ch = logging.StreamHandler() ch.setFormatter(fmt) log = logging.getLogger(__name__) log.addHandler(ch) log.setLevel(logging.INFO) def sanitize_name(name: str) -> str: """Sanitize a name to be filesystem-friendly.""" return name.lower().replace(" ", "-").replace("_", "-") def harvest_readmes(organization: str, branch: list[str | None] = ["main", "dev", None], projects: list[str] = [], output_path: str = "reference") -> None: """Harvest README files from repositories.""" org = Organization("https://dev.azure.com/" + organization, token=get_token()) if projects: # Target specific projects target_projects = [Project(org=org, name=project_name) for project_name in projects] else: # Target all projects target_projects = org.projects for project in target_projects: repo_index = [] # Repository index for the project. log.info(f"Processing project: {project.name} with {len(project.repositories)} repositories.") # type: ignore for repo in project.repositories: log.info(f"...processing repository: {repo.name}") readme_found = False # Try each specified branch to find the README.md file for branch_name in branch: try: # Check if the README.md file exists readme = Item(repository=repo, path="/README.md", branch=branch_name) # Build output path and save the README content if found if readme: project_path = f"{output_path}/{sanitize_name(project.name)}" # type: ignore # Create project directory if it doesn't exist os.makedirs(project_path, exist_ok=True) # Save README content to index.md readme_content = readme.get_content(branch=branch_name) if readme_content is None or len(readme_content.strip()) == 0: continue with open(f"{project_path}/{sanitize_name(repo.name)}.md", "w") as f: f.write(readme_content.decode("utf-8")) readme_found = True break # Exit branch loop if README is found except requests.exceptions.HTTPError: # Repository does not have a README.md file in the specified branch continue # Register if README was not found in any branch repo_index.append((repo.name, readme_found)) # README not found # Log if the README was not found if not readme_found: log.warning(f"......README.md in repo {repo.name} is not found or empty.") # Save the repository index for the project with open(f"{output_path}/{sanitize_name(project.name)}/index.md", "w") as index_file: # type: ignore index_file.write(f"# Repository Index for Project: {project.name}\n\n") # type: ignore for repo_name, has_readme in repo_index: status = "" if has_readme else " - README.md not found" index_file.write(f"- [{repo_name}]({sanitize_name(repo_name)}.md){status}\n") # Save the reference index for all projects with open(f"{output_path}/index.md", "w") as ref_index_file: ref_index_file.write("# Project Index\n\n") for project in target_projects: ref_index_file.write(f"- [{project.name}]({sanitize_name(project.name)}/index.md)\n") # type: ignore