Compare commits
14 Commits
5412c3ea09
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b3ebe386dc | |||
| 15dcf09ec1 | |||
| ca16f1b098 | |||
| 4276939f30 | |||
| 15349c9390 | |||
| 064364caa6 | |||
| b83f53d140 | |||
| 32ee557f93 | |||
| 565464e266 | |||
| c3c5f9935c | |||
| c29e0b4e21 | |||
| c32fc25cee | |||
| e6bca1ce47 | |||
| 2addc85e40 |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,6 +1,8 @@
|
|||||||
# Python
|
# Python
|
||||||
.venv
|
.venv
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
|
||||||
# Ignore sample JSON files
|
# Ignore sample JSON files
|
||||||
*.sample.json
|
*.sample.json
|
||||||
@@ -19,3 +21,6 @@ prototype_*.py
|
|||||||
*.pem
|
*.pem
|
||||||
*.key
|
*.key
|
||||||
*.crt
|
*.crt
|
||||||
|
|
||||||
|
# Harvester output
|
||||||
|
reference
|
||||||
21
BUILD.md
Normal file
21
BUILD.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# Build Instructions
|
||||||
|
|
||||||
|
Run the following command to build the project:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python -m build
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create distribution files in the `dist/` directory.
|
||||||
|
|
||||||
|
Install the built package using pip:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install --no-index dist/docs_harvester-0.1.0-py3-none-any.whl
|
||||||
|
```
|
||||||
|
|
||||||
|
Install in editable mode for development:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
9
DEVOPS.md
Normal file
9
DEVOPS.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# DevOps Notes
|
||||||
|
|
||||||
|
## DevOps OAuth2 Flow
|
||||||
|
|
||||||
|
Type: **oauth2**
|
||||||
|
Flow: **accessCode**
|
||||||
|
Authorization URL: `https://app.vssps.visualstudio.com/oauth2/authorize&response_type=Assertion`
|
||||||
|
Token URL: `https://app.vssps.visualstudio.com/oauth2/token?client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-bearer&grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer`
|
||||||
|
Scopes: `vso.code`
|
||||||
@@ -3,11 +3,3 @@
|
|||||||
[](https://gitea.koszewscy.waw.pl/slawek/docs-harvester/actions?workflow=unit-tests.yml)
|
[](https://gitea.koszewscy.waw.pl/slawek/docs-harvester/actions?workflow=unit-tests.yml)
|
||||||
|
|
||||||
This project is designed to harvest and process Markdown documentation files from Git repositories.
|
This project is designed to harvest and process Markdown documentation files from Git repositories.
|
||||||
|
|
||||||
## DevOps OAuth2 Flow
|
|
||||||
|
|
||||||
Type: **oauth2**
|
|
||||||
Flow: **accessCode**
|
|
||||||
Authorization URL: `https://app.vssps.visualstudio.com/oauth2/authorize&response_type=Assertion`
|
|
||||||
Token URL: `https://app.vssps.visualstudio.com/oauth2/token?client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-bearer&grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer`
|
|
||||||
Scopes: `vso.code`
|
|
||||||
1
devops/__init__.py
Normal file
1
devops/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# devops package
|
||||||
@@ -59,9 +59,9 @@ def get_token(
|
|||||||
|
|
||||||
def secret_credentials_auth(
|
def secret_credentials_auth(
|
||||||
scope: str = DEVOPS_SCOPE,
|
scope: str = DEVOPS_SCOPE,
|
||||||
tenant_id: str = os.environ.get("AZURE_TENANT_ID", ""),
|
tenant_id = os.environ.get("AZURE_TENANT_ID", ""),
|
||||||
client_id: str = os.environ.get("AZURE_CLIENT_ID", ""),
|
client_id = os.environ.get("AZURE_CLIENT_ID", ""),
|
||||||
client_secret: str = os.environ.get("AZURE_CLIENT_SECRET")
|
client_secret = os.environ.get("AZURE_CLIENT_SECRET")
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Authenticate using client credentials. Pass credentials via environment variables,
|
Authenticate using client credentials. Pass credentials via environment variables,
|
||||||
@@ -92,8 +92,8 @@ def certificate_credentials_auth(
|
|||||||
# Wczytaj klucz prywatny (RSA)
|
# Wczytaj klucz prywatny (RSA)
|
||||||
with open(pem_path, "rb") as f:
|
with open(pem_path, "rb") as f:
|
||||||
pem = f.read()
|
pem = f.read()
|
||||||
key_pem = re.search(b"-----BEGIN (?:RSA )?PRIVATE KEY-----.*?END (?:RSA )?PRIVATE KEY-----", pem, re.S).group(0)
|
key_pem = re.search(b"-----BEGIN (?:RSA )?PRIVATE KEY-----.*?END (?:RSA )?PRIVATE KEY-----", pem, re.S).group(0) # type: ignore
|
||||||
cert_pem = re.search(b"-----BEGIN CERTIFICATE-----.*?END CERTIFICATE-----", pem, re.S).group(0)
|
cert_pem = re.search(b"-----BEGIN CERTIFICATE-----.*?END CERTIFICATE-----", pem, re.S).group(0) # type: ignore
|
||||||
|
|
||||||
private_key = serialization.load_pem_private_key(key_pem, password=None)
|
private_key = serialization.load_pem_private_key(key_pem, password=None)
|
||||||
cert = x509.load_pem_x509_certificate(cert_pem)
|
cert = x509.load_pem_x509_certificate(cert_pem)
|
||||||
@@ -115,7 +115,7 @@ def certificate_credentials_auth(
|
|||||||
|
|
||||||
headers = {"x5t": x5t, "kid": x5t}
|
headers = {"x5t": x5t, "kid": x5t}
|
||||||
|
|
||||||
assertion = jwt.encode(claims, private_key, algorithm="RS256", headers=headers)
|
assertion = jwt.encode(claims, private_key, algorithm="RS256", headers=headers) # type: ignore
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"grant_type": "client_credentials",
|
"grant_type": "client_credentials",
|
||||||
@@ -4,15 +4,11 @@ import requests
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
import logging
|
import logging
|
||||||
from sk.logger import log_entity_creation
|
|
||||||
|
|
||||||
DEVOPS_SCOPE = "https://app.vssps.visualstudio.com/.default"
|
DEVOPS_SCOPE = "https://app.vssps.visualstudio.com/.default"
|
||||||
DEVOPS_API_VERSION = "7.1"
|
DEVOPS_API_VERSION = "7.1"
|
||||||
|
|
||||||
# Get logger. It should be configured by the main application.
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
# log.setLevel(logging.DEBUG)
|
|
||||||
# log.propagate = False
|
|
||||||
|
|
||||||
# Define a class decorator
|
# Define a class decorator
|
||||||
def auto_properties(mapping: dict[str,str]):
|
def auto_properties(mapping: dict[str,str]):
|
||||||
@@ -83,7 +79,6 @@ class Organization():
|
|||||||
self._org_url = org_url.rstrip("/") + "/" # Ensure trailing slash
|
self._org_url = org_url.rstrip("/") + "/" # Ensure trailing slash
|
||||||
self._token = token
|
self._token = token
|
||||||
self._api_version = api_version
|
self._api_version = api_version
|
||||||
log_entity_creation(log, Organization, self._org_url)
|
|
||||||
|
|
||||||
def get_path(self, path: str, params: dict = {}) -> requests.Response:
|
def get_path(self, path: str, params: dict = {}) -> requests.Response:
|
||||||
return get_url(
|
return get_url(
|
||||||
@@ -131,8 +126,6 @@ class Project():
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
raise ValueError(f"Invalid project ID: {self._id}")
|
raise ValueError(f"Invalid project ID: {self._id}")
|
||||||
|
|
||||||
log_entity_creation(log, Project, self.id)
|
|
||||||
|
|
||||||
def get_auto_properties(self):
|
def get_auto_properties(self):
|
||||||
r = get_url(
|
r = get_url(
|
||||||
URL=f"{self._org._org_url}_apis/projects/{self._id}",
|
URL=f"{self._org._org_url}_apis/projects/{self._id}",
|
||||||
@@ -195,7 +188,6 @@ class Repository():
|
|||||||
|
|
||||||
# set other properties if provided
|
# set other properties if provided
|
||||||
self.from_args(**kwargs) # type: ignore[attr-defined]
|
self.from_args(**kwargs) # type: ignore[attr-defined]
|
||||||
log_entity_creation(log, Repository, self.id)
|
|
||||||
|
|
||||||
def get_auto_properties(self):
|
def get_auto_properties(self):
|
||||||
id = self._id if hasattr(self, "_id") else self._name # type: ignore[attr-defined]
|
id = self._id if hasattr(self, "_id") else self._name # type: ignore[attr-defined]
|
||||||
@@ -245,7 +237,6 @@ class Item():
|
|||||||
self.from_args(**kwargs) # type: ignore[attr-defined]
|
self.from_args(**kwargs) # type: ignore[attr-defined]
|
||||||
if "branch" in kwargs:
|
if "branch" in kwargs:
|
||||||
self._branch = kwargs.get("branch")
|
self._branch = kwargs.get("branch")
|
||||||
log_entity_creation(log, Item, self.path)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def branch(self):
|
def branch(self):
|
||||||
@@ -15,7 +15,7 @@ Usage:
|
|||||||
curl -sH "Authorization: Bearer $ADO_TOKEN" "https://dev.azure.com/$ADO_ORGANIZATION_URL/_apis/projects?api-version=7.1"
|
curl -sH "Authorization: Bearer $ADO_TOKEN" "https://dev.azure.com/$ADO_ORGANIZATION_URL/_apis/projects?api-version=7.1"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from sk.azure import get_token
|
from devops.azure import get_token
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|||||||
20
harvester.py
20
harvester.py
@@ -1,18 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import requests
|
from harvester.harvester import harvest_readmes
|
||||||
from sk.devops import Organization, Project, Repository, Item
|
|
||||||
from sk.azure import get_token
|
|
||||||
|
|
||||||
org = Organization("https://dev.azure.com/mcovsandbox", token=get_token())
|
if __name__ == "__main__":
|
||||||
|
harvest_readmes(
|
||||||
# Find all Markdown files in all projects and repositories
|
organization="mcovsandbox"
|
||||||
for project in org.projects:
|
)
|
||||||
for repo in project.repositories:
|
|
||||||
try:
|
|
||||||
root_item = Item(repository=repo, path="/")
|
|
||||||
md_files = root_item.get_child_items(pattern="*.md", recurse=True)
|
|
||||||
for md_file in md_files:
|
|
||||||
print(f"Project: {project.name}, Repo: {repo.name}, File: {md_file.path}")
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
print(f"Repository {repo.name} is empty.")
|
|
||||||
|
|||||||
1
harvester/__init__.py
Normal file
1
harvester/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Harvester Package
|
||||||
75
harvester/harvester.py
Normal file
75
harvester/harvester.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from devops.azure import get_token
|
||||||
|
from devops.devops import Organization, Project, Repository, Item
|
||||||
|
import logging
|
||||||
|
|
||||||
|
fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
ch = logging.StreamHandler()
|
||||||
|
ch.setFormatter(fmt)
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
log.addHandler(ch)
|
||||||
|
log.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
def sanitize_name(name: str) -> str:
|
||||||
|
"""Sanitize a name to be filesystem-friendly."""
|
||||||
|
return name.lower().replace(" ", "-").replace("_", "-")
|
||||||
|
|
||||||
|
def harvest_readmes(organization: str | Organization, branch: list[str | None] = ["main", "dev", None], projects: list[str] = [], output_path: str = "reference") -> None:
|
||||||
|
"""Harvest README files from repositories."""
|
||||||
|
if isinstance(organization, str):
|
||||||
|
org = Organization("https://dev.azure.com/" + organization, token=get_token())
|
||||||
|
else:
|
||||||
|
org = organization
|
||||||
|
|
||||||
|
if projects:
|
||||||
|
# Target specific projects
|
||||||
|
target_projects = [Project(org=org, name=project_name) for project_name in projects]
|
||||||
|
else:
|
||||||
|
# Target all projects
|
||||||
|
target_projects = org.projects
|
||||||
|
|
||||||
|
for project in target_projects:
|
||||||
|
repo_index = [] # Repository index for the project.
|
||||||
|
log.info(f"Processing project: {project.name} with {len(project.repositories)} repositories.") # type: ignore
|
||||||
|
for repo in project.repositories:
|
||||||
|
log.info(f"...processing repository: {repo.name}")
|
||||||
|
readme_found = False
|
||||||
|
# Try each specified branch to find the README.md file
|
||||||
|
for branch_name in branch:
|
||||||
|
try:
|
||||||
|
# Check if the README.md file exists
|
||||||
|
readme = Item(repository=repo, path="/README.md", branch=branch_name)
|
||||||
|
# Build output path and save the README content if found
|
||||||
|
if readme:
|
||||||
|
project_path = f"{output_path}/{sanitize_name(project.name)}" # type: ignore
|
||||||
|
# Create project directory if it doesn't exist
|
||||||
|
os.makedirs(project_path, exist_ok=True)
|
||||||
|
# Save README content to index.md
|
||||||
|
readme_content = readme.get_content(branch=branch_name)
|
||||||
|
if readme_content is None or len(readme_content.strip()) == 0:
|
||||||
|
continue
|
||||||
|
with open(f"{project_path}/{sanitize_name(repo.name)}.md", "w") as f:
|
||||||
|
f.write(readme_content.decode("utf-8"))
|
||||||
|
readme_found = True
|
||||||
|
break # Exit branch loop if README is found
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
# Repository does not have a README.md file in the specified branch
|
||||||
|
continue
|
||||||
|
# Register if README was not found in any branch
|
||||||
|
repo_index.append((repo.name, readme_found)) # README not found
|
||||||
|
# Log if the README was not found
|
||||||
|
if not readme_found:
|
||||||
|
log.warning(f"......README.md in repo {repo.name} is not found or empty.")
|
||||||
|
# Save the repository index for the project
|
||||||
|
with open(f"{output_path}/{sanitize_name(project.name)}/index.md", "w") as index_file: # type: ignore
|
||||||
|
index_file.write(f"# Repository Index for Project: {project.name}\n\n") # type: ignore
|
||||||
|
for repo_name, has_readme in repo_index:
|
||||||
|
status = "" if has_readme else " - README.md not found"
|
||||||
|
index_file.write(f"- [{repo_name}]({sanitize_name(repo_name)}.md){status}\n")
|
||||||
|
|
||||||
|
# Save the reference index for all projects
|
||||||
|
with open(f"{output_path}/index.md", "w") as ref_index_file:
|
||||||
|
ref_index_file.write("# Project Index\n\n")
|
||||||
|
for project in target_projects:
|
||||||
|
ref_index_file.write(f"- [{project.name}]({sanitize_name(project.name)}/index.md)\n") # type: ignore
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
|
|
||||||
python3 -m venv .venv
|
VERSION="${1:-3}"
|
||||||
|
|
||||||
|
python${VERSION} -m venv .venv
|
||||||
./.venv/bin/pip install --upgrade pip
|
./.venv/bin/pip install --upgrade pip
|
||||||
./.venv/bin/pip install -r requirements.txt
|
./.venv/bin/pip install -r requirements.txt
|
||||||
|
|
||||||
|
|||||||
4
mkdocs.yml
Normal file
4
mkdocs.yml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
site_name: Reference Documentation
|
||||||
|
docs_dir: reference
|
||||||
|
theme:
|
||||||
|
name: material
|
||||||
11
pyproject.toml
Normal file
11
pyproject.toml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools", "wheel", "build"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "devops"
|
||||||
|
version = "0.1.0"
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
packages = ["devops"]
|
||||||
@@ -2,3 +2,4 @@ debugpy==1.8.17
|
|||||||
azure-identity==1.25.1
|
azure-identity==1.25.1
|
||||||
requests==2.32.5
|
requests==2.32.5
|
||||||
loki-logger-handler==1.1.2
|
loki-logger-handler==1.1.2
|
||||||
|
mkdocs-material>=1.5.2
|
||||||
|
|||||||
1
sk/__init__.py
Normal file
1
sk/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# My helper tools
|
||||||
4
tests.py
4
tests.py
@@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import unittest
|
import unittest
|
||||||
import requests
|
import requests
|
||||||
from sk.devops import Organization, Repository, Project, Item
|
from devops.devops import Organization, Repository, Project, Item
|
||||||
from sk.azure import get_token
|
from devops.azure import get_token
|
||||||
|
|
||||||
# Get the token outside the test class to speed up tests.
|
# Get the token outside the test class to speed up tests.
|
||||||
# Each Unit test instantinates the class, so doing it here avoids repeated authentication.
|
# Each Unit test instantinates the class, so doing it here avoids repeated authentication.
|
||||||
|
|||||||
Reference in New Issue
Block a user