Added harvester package with harvester module and harvest_readmes() function prototype.
All checks were successful
/ unit-tests (push) Successful in 10s
All checks were successful
/ unit-tests (push) Successful in 10s
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -21,3 +21,6 @@ prototype_*.py
|
|||||||
*.pem
|
*.pem
|
||||||
*.key
|
*.key
|
||||||
*.crt
|
*.crt
|
||||||
|
|
||||||
|
# Harvester output
|
||||||
|
reference
|
||||||
20
harvester.py
20
harvester.py
@@ -1,18 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import requests
|
from harvester.harvester import harvest_readmes
|
||||||
from devops.devops import Organization, Project, Repository, Item
|
|
||||||
from devops.azure import get_token
|
|
||||||
|
|
||||||
org = Organization("https://dev.azure.com/mcovsandbox", token=get_token())
|
if __name__ == "__main__":
|
||||||
|
harvest_readmes(
|
||||||
# Find all Markdown files in all projects and repositories
|
organization="mcovsandbox"
|
||||||
for project in org.projects:
|
)
|
||||||
for repo in project.repositories:
|
|
||||||
try:
|
|
||||||
root_item = Item(repository=repo, path="/")
|
|
||||||
md_files = root_item.get_child_items(pattern="*.md", recurse=True)
|
|
||||||
for md_file in md_files:
|
|
||||||
print(f"Project: {project.name}, Repo: {repo.name}, File: {md_file.path}")
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
print(f"Repository {repo.name} is empty.")
|
|
||||||
|
|||||||
1
harvester/__init__.py
Normal file
1
harvester/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Harvester Package
|
||||||
72
harvester/harvester.py
Normal file
72
harvester/harvester.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from devops.azure import get_token
|
||||||
|
from devops.devops import Organization, Project, Repository, Item
|
||||||
|
import logging
|
||||||
|
|
||||||
|
fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
ch = logging.StreamHandler()
|
||||||
|
ch.setFormatter(fmt)
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
log.addHandler(ch)
|
||||||
|
log.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
def sanitize_name(name: str) -> str:
|
||||||
|
"""Sanitize a name to be filesystem-friendly."""
|
||||||
|
return name.lower().replace(" ", "-").replace("_", "-")
|
||||||
|
|
||||||
|
def harvest_readmes(organization: str, branch: list[str | None] = ["main", "dev", None], projects: list[str] = [], output_path: str = "reference") -> None:
|
||||||
|
"""Harvest README files from repositories."""
|
||||||
|
org = Organization("https://dev.azure.com/" + organization, token=get_token())
|
||||||
|
|
||||||
|
if projects:
|
||||||
|
# Target specific projects
|
||||||
|
target_projects = [Project(org=org, name=project_name) for project_name in projects]
|
||||||
|
else:
|
||||||
|
# Target all projects
|
||||||
|
target_projects = org.projects
|
||||||
|
|
||||||
|
for project in target_projects:
|
||||||
|
repo_index = [] # Repository index for the project.
|
||||||
|
log.info(f"Processing project: {project.name} with {len(project.repositories)} repositories.") # type: ignore
|
||||||
|
for repo in project.repositories:
|
||||||
|
log.info(f"...processing repository: {repo.name}")
|
||||||
|
readme_found = False
|
||||||
|
# Try each specified branch to find the README.md file
|
||||||
|
for branch_name in branch:
|
||||||
|
try:
|
||||||
|
# Check if the README.md file exists
|
||||||
|
readme = Item(repository=repo, path="/README.md", branch=branch_name)
|
||||||
|
# Build output path and save the README content if found
|
||||||
|
if readme:
|
||||||
|
project_path = f"{output_path}/{sanitize_name(project.name)}" # type: ignore
|
||||||
|
# Create project directory if it doesn't exist
|
||||||
|
os.makedirs(project_path, exist_ok=True)
|
||||||
|
# Save README content to index.md
|
||||||
|
readme_content = readme.get_content(branch=branch_name)
|
||||||
|
if readme_content is None or len(readme_content.strip()) == 0:
|
||||||
|
continue
|
||||||
|
with open(f"{project_path}/{sanitize_name(repo.name)}.md", "w") as f:
|
||||||
|
f.write(readme_content.decode("utf-8"))
|
||||||
|
readme_found = True
|
||||||
|
break # Exit branch loop if README is found
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
# Repository does not have a README.md file in the specified branch
|
||||||
|
continue
|
||||||
|
# Register if README was not found in any branch
|
||||||
|
repo_index.append((repo.name, readme_found)) # README not found
|
||||||
|
# Log if the README was not found
|
||||||
|
if not readme_found:
|
||||||
|
log.warning(f"......README.md in repo {repo.name} is not found or empty.")
|
||||||
|
# Save the repository index for the project
|
||||||
|
with open(f"{output_path}/{sanitize_name(project.name)}/index.md", "w") as index_file: # type: ignore
|
||||||
|
index_file.write(f"# Repository Index for Project: {project.name}\n\n") # type: ignore
|
||||||
|
for repo_name, has_readme in repo_index:
|
||||||
|
status = "" if has_readme else " - README.md not found"
|
||||||
|
index_file.write(f"- [{repo_name}]({sanitize_name(repo_name)}.md){status}\n")
|
||||||
|
|
||||||
|
# Save the reference index for all projects
|
||||||
|
with open(f"{output_path}/index.md", "w") as ref_index_file:
|
||||||
|
ref_index_file.write("# Project Index\n\n")
|
||||||
|
for project in target_projects:
|
||||||
|
ref_index_file.write(f"- [{project.name}]({sanitize_name(project.name)}/index.md)\n") # type: ignore
|
||||||
Reference in New Issue
Block a user