Compare commits

...

12 Commits

Author SHA1 Message Date
b3ebe386dc Add mkdocs-material dependency to requirements.txt 2026-02-03 19:18:44 +01:00
15dcf09ec1 Added ability to specify the Organization object or ogranization name for harvest_readmes function.
All checks were successful
/ unit-tests (push) Successful in 52s
2025-11-20 09:29:20 +01:00
ca16f1b098 Added minimal mkdocs config.
All checks were successful
/ unit-tests (push) Successful in 36s
2025-11-15 11:46:07 +01:00
4276939f30 Converted "sk" to package. 2025-11-15 11:42:35 +01:00
15349c9390 Allow specifying Python version for virtual environment creation 2025-11-15 11:42:13 +01:00
064364caa6 Added minimal MkDocs configuration. 2025-11-15 11:42:07 +01:00
b83f53d140 Minimal MkDocs configuration to display harvested docs. 2025-11-14 08:57:16 +01:00
32ee557f93 Fixed incorrect import.
All checks were successful
/ unit-tests (push) Successful in 44s
2025-11-12 09:03:27 +01:00
565464e266 Added harvester package with harvester module and harvest_readmes() function prototype.
All checks were successful
/ unit-tests (push) Successful in 10s
2025-11-09 20:55:05 +01:00
c3c5f9935c Added package build and develop instructions. 2025-11-09 19:29:09 +01:00
c29e0b4e21 Added PyProject file. 2025-11-09 19:24:53 +01:00
c32fc25cee Excluded Python package build artifacts from Git repository. 2025-11-09 19:24:33 +01:00
13 changed files with 137 additions and 25 deletions

5
.gitignore vendored
View File

@@ -1,6 +1,8 @@
# Python # Python
.venv .venv
__pycache__/ __pycache__/
*.egg-info/
dist/
# Ignore sample JSON files # Ignore sample JSON files
*.sample.json *.sample.json
@@ -19,3 +21,6 @@ prototype_*.py
*.pem *.pem
*.key *.key
*.crt *.crt
# Harvester output
reference

21
BUILD.md Normal file
View File

@@ -0,0 +1,21 @@
# Build Instructions
Run the following command to build the project:
```shell
python -m build
```
This will create distribution files in the `dist/` directory.
Install the built package using pip:
```shell
pip install --no-index dist/docs_harvester-0.1.0-py3-none-any.whl
```
Install in editable mode for development:
```shell
pip install -e .
```

9
DEVOPS.md Normal file
View File

@@ -0,0 +1,9 @@
# DevOps Notes
## DevOps OAuth2 Flow
Type: **oauth2**
Flow: **accessCode**
Authorization URL: `https://app.vssps.visualstudio.com/oauth2/authorize&response_type=Assertion`
Token URL: `https://app.vssps.visualstudio.com/oauth2/token?client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-bearer&grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer`
Scopes: `vso.code`

View File

@@ -3,11 +3,3 @@
[![Unit Tests](https://gitea.koszewscy.waw.pl/slawek/docs-harvester/actions/workflows/unit-tests.yml/badge.svg)](https://gitea.koszewscy.waw.pl/slawek/docs-harvester/actions?workflow=unit-tests.yml) [![Unit Tests](https://gitea.koszewscy.waw.pl/slawek/docs-harvester/actions/workflows/unit-tests.yml/badge.svg)](https://gitea.koszewscy.waw.pl/slawek/docs-harvester/actions?workflow=unit-tests.yml)
This project is designed to harvest and process Markdown documentation files from Git repositories. This project is designed to harvest and process Markdown documentation files from Git repositories.
## DevOps OAuth2 Flow
Type: **oauth2**
Flow: **accessCode**
Authorization URL: `https://app.vssps.visualstudio.com/oauth2/authorize&response_type=Assertion`
Token URL: `https://app.vssps.visualstudio.com/oauth2/token?client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-bearer&grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer`
Scopes: `vso.code`

View File

@@ -15,7 +15,7 @@ Usage:
curl -sH "Authorization: Bearer $ADO_TOKEN" "https://dev.azure.com/$ADO_ORGANIZATION_URL/_apis/projects?api-version=7.1" curl -sH "Authorization: Bearer $ADO_TOKEN" "https://dev.azure.com/$ADO_ORGANIZATION_URL/_apis/projects?api-version=7.1"
""" """
from sk.azure import get_token from devops.azure import get_token
from argparse import ArgumentParser from argparse import ArgumentParser
import os import os

View File

@@ -1,18 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import requests from harvester.harvester import harvest_readmes
from devops.devops import Organization, Project, Repository, Item
from devops.azure import get_token
org = Organization("https://dev.azure.com/mcovsandbox", token=get_token()) if __name__ == "__main__":
harvest_readmes(
# Find all Markdown files in all projects and repositories organization="mcovsandbox"
for project in org.projects: )
for repo in project.repositories:
try:
root_item = Item(repository=repo, path="/")
md_files = root_item.get_child_items(pattern="*.md", recurse=True)
for md_file in md_files:
print(f"Project: {project.name}, Repo: {repo.name}, File: {md_file.path}")
except requests.exceptions.HTTPError as e:
print(f"Repository {repo.name} is empty.")

1
harvester/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Harvester Package

75
harvester/harvester.py Normal file
View File

@@ -0,0 +1,75 @@
import os
import requests
from devops.azure import get_token
from devops.devops import Organization, Project, Repository, Item
import logging
fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(fmt)
log = logging.getLogger(__name__)
log.addHandler(ch)
log.setLevel(logging.INFO)
def sanitize_name(name: str) -> str:
"""Sanitize a name to be filesystem-friendly."""
return name.lower().replace(" ", "-").replace("_", "-")
def harvest_readmes(organization: str | Organization, branch: list[str | None] = ["main", "dev", None], projects: list[str] = [], output_path: str = "reference") -> None:
"""Harvest README files from repositories."""
if isinstance(organization, str):
org = Organization("https://dev.azure.com/" + organization, token=get_token())
else:
org = organization
if projects:
# Target specific projects
target_projects = [Project(org=org, name=project_name) for project_name in projects]
else:
# Target all projects
target_projects = org.projects
for project in target_projects:
repo_index = [] # Repository index for the project.
log.info(f"Processing project: {project.name} with {len(project.repositories)} repositories.") # type: ignore
for repo in project.repositories:
log.info(f"...processing repository: {repo.name}")
readme_found = False
# Try each specified branch to find the README.md file
for branch_name in branch:
try:
# Check if the README.md file exists
readme = Item(repository=repo, path="/README.md", branch=branch_name)
# Build output path and save the README content if found
if readme:
project_path = f"{output_path}/{sanitize_name(project.name)}" # type: ignore
# Create project directory if it doesn't exist
os.makedirs(project_path, exist_ok=True)
# Save README content to index.md
readme_content = readme.get_content(branch=branch_name)
if readme_content is None or len(readme_content.strip()) == 0:
continue
with open(f"{project_path}/{sanitize_name(repo.name)}.md", "w") as f:
f.write(readme_content.decode("utf-8"))
readme_found = True
break # Exit branch loop if README is found
except requests.exceptions.HTTPError:
# Repository does not have a README.md file in the specified branch
continue
# Register if README was not found in any branch
repo_index.append((repo.name, readme_found)) # README not found
# Log if the README was not found
if not readme_found:
log.warning(f"......README.md in repo {repo.name} is not found or empty.")
# Save the repository index for the project
with open(f"{output_path}/{sanitize_name(project.name)}/index.md", "w") as index_file: # type: ignore
index_file.write(f"# Repository Index for Project: {project.name}\n\n") # type: ignore
for repo_name, has_readme in repo_index:
status = "" if has_readme else " - README.md not found"
index_file.write(f"- [{repo_name}]({sanitize_name(repo_name)}.md){status}\n")
# Save the reference index for all projects
with open(f"{output_path}/index.md", "w") as ref_index_file:
ref_index_file.write("# Project Index\n\n")
for project in target_projects:
ref_index_file.write(f"- [{project.name}]({sanitize_name(project.name)}/index.md)\n") # type: ignore

View File

@@ -1,6 +1,8 @@
#! /usr/bin/env bash #! /usr/bin/env bash
python3 -m venv .venv VERSION="${1:-3}"
python${VERSION} -m venv .venv
./.venv/bin/pip install --upgrade pip ./.venv/bin/pip install --upgrade pip
./.venv/bin/pip install -r requirements.txt ./.venv/bin/pip install -r requirements.txt

4
mkdocs.yml Normal file
View File

@@ -0,0 +1,4 @@
site_name: Reference Documentation
docs_dir: reference
theme:
name: material

11
pyproject.toml Normal file
View File

@@ -0,0 +1,11 @@
[build-system]
requires = ["setuptools", "wheel", "build"]
build-backend = "setuptools.build_meta"
[project]
name = "devops"
version = "0.1.0"
requires-python = ">=3.12"
[tool.setuptools]
packages = ["devops"]

View File

@@ -2,3 +2,4 @@ debugpy==1.8.17
azure-identity==1.25.1 azure-identity==1.25.1
requests==2.32.5 requests==2.32.5
loki-logger-handler==1.1.2 loki-logger-handler==1.1.2
mkdocs-material>=1.5.2

1
sk/__init__.py Normal file
View File

@@ -0,0 +1 @@
# My helper tools