Compare commits

...

22 Commits

Author SHA1 Message Date
b3ebe386dc Add mkdocs-material dependency to requirements.txt 2026-02-03 19:18:44 +01:00
15dcf09ec1 Added ability to specify the Organization object or ogranization name for harvest_readmes function.
All checks were successful
/ unit-tests (push) Successful in 52s
2025-11-20 09:29:20 +01:00
ca16f1b098 Added minimal mkdocs config.
All checks were successful
/ unit-tests (push) Successful in 36s
2025-11-15 11:46:07 +01:00
4276939f30 Converted "sk" to package. 2025-11-15 11:42:35 +01:00
15349c9390 Allow specifying Python version for virtual environment creation 2025-11-15 11:42:13 +01:00
064364caa6 Added minimal MkDocs configuration. 2025-11-15 11:42:07 +01:00
b83f53d140 Minimal MkDocs configuration to display harvested docs. 2025-11-14 08:57:16 +01:00
32ee557f93 Fixed incorrect import.
All checks were successful
/ unit-tests (push) Successful in 44s
2025-11-12 09:03:27 +01:00
565464e266 Added harvester package with harvester module and harvest_readmes() function prototype.
All checks were successful
/ unit-tests (push) Successful in 10s
2025-11-09 20:55:05 +01:00
c3c5f9935c Added package build and develop instructions. 2025-11-09 19:29:09 +01:00
c29e0b4e21 Added PyProject file. 2025-11-09 19:24:53 +01:00
c32fc25cee Excluded Python package build artifacts from Git repository. 2025-11-09 19:24:33 +01:00
e6bca1ce47 Updates related to package name refactor.
All checks were successful
/ unit-tests (push) Successful in 10s
2025-11-09 19:21:56 +01:00
2addc85e40 Moved devops related code to a separate package. 2025-11-09 19:18:53 +01:00
5412c3ea09 Add DevOps OAuth2 flow details to README
All checks were successful
/ unit-tests (push) Successful in 10s
2025-11-09 18:45:10 +01:00
6e16cebeea Add branch support to Item class and enhance get_content method 2025-11-09 18:44:52 +01:00
d06bc05a2d Updated argument handling for get-token.py.
All checks were successful
/ unit-tests (push) Successful in 9s
2025-11-09 10:59:19 +01:00
495ba0b0b3 Modernized the get-token.py.
All checks were successful
/ unit-tests (push) Successful in 13s
2025-11-09 10:52:22 +01:00
7d5d451d0c Refactor harvester to list Markdown files across all projects and repositories
All checks were successful
/ unit-tests (push) Successful in 12s
2025-11-08 19:30:21 +01:00
8b2a06551a Modified harvester script to look for Python files in the test repository.
All checks were successful
/ unit-tests (push) Successful in 10s
2025-11-08 18:15:26 +01:00
89b9f8d6e6 Modified Item's get_child_items to accept pattern and recurse parameters to support looking for specific patterns in the repository. 2025-11-08 18:14:34 +01:00
d6b58f0b51 Added __getitem__ to allow easy directory traversal. 2025-11-08 17:50:51 +01:00
17 changed files with 227 additions and 50 deletions

8
.gitignore vendored
View File

@@ -1,6 +1,8 @@
# Python
.venv
__pycache__/
*.egg-info/
dist/
# Ignore sample JSON files
*.sample.json
@@ -12,7 +14,13 @@ prototype_*.py
*.secret
*.client_secret
# Environment files
*.env
# Certificate files
*.pem
*.key
*.crt
# Harvester output
reference

View File

@@ -2,5 +2,6 @@
"debug.autoExpandLazyVariables": "off",
"debug.inlineValues": "off",
"debugpy.debugJustMyCode": true,
"debugpy.showPythonInlineValues": false
"debugpy.showPythonInlineValues": false,
"python.terminal.useEnvFile": true
}

21
BUILD.md Normal file
View File

@@ -0,0 +1,21 @@
# Build Instructions
Run the following command to build the project:
```shell
python -m build
```
This will create distribution files in the `dist/` directory.
Install the built package using pip:
```shell
pip install --no-index dist/docs_harvester-0.1.0-py3-none-any.whl
```
Install in editable mode for development:
```shell
pip install -e .
```

9
DEVOPS.md Normal file
View File

@@ -0,0 +1,9 @@
# DevOps Notes
## DevOps OAuth2 Flow
Type: **oauth2**
Flow: **accessCode**
Authorization URL: `https://app.vssps.visualstudio.com/oauth2/authorize&response_type=Assertion`
Token URL: `https://app.vssps.visualstudio.com/oauth2/token?client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-bearer&grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer`
Scopes: `vso.code`

1
devops/__init__.py Normal file
View File

@@ -0,0 +1 @@
# devops package

View File

@@ -59,9 +59,9 @@ def get_token(
def secret_credentials_auth(
scope: str = DEVOPS_SCOPE,
tenant_id: str = os.environ.get("AZURE_TENANT_ID", ""),
client_id: str = os.environ.get("AZURE_CLIENT_ID", ""),
client_secret: str = os.environ.get("AZURE_CLIENT_SECRET")
tenant_id = os.environ.get("AZURE_TENANT_ID", ""),
client_id = os.environ.get("AZURE_CLIENT_ID", ""),
client_secret = os.environ.get("AZURE_CLIENT_SECRET")
) -> str:
"""
Authenticate using client credentials. Pass credentials via environment variables,
@@ -92,8 +92,8 @@ def certificate_credentials_auth(
# Wczytaj klucz prywatny (RSA)
with open(pem_path, "rb") as f:
pem = f.read()
key_pem = re.search(b"-----BEGIN (?:RSA )?PRIVATE KEY-----.*?END (?:RSA )?PRIVATE KEY-----", pem, re.S).group(0)
cert_pem = re.search(b"-----BEGIN CERTIFICATE-----.*?END CERTIFICATE-----", pem, re.S).group(0)
key_pem = re.search(b"-----BEGIN (?:RSA )?PRIVATE KEY-----.*?END (?:RSA )?PRIVATE KEY-----", pem, re.S).group(0) # type: ignore
cert_pem = re.search(b"-----BEGIN CERTIFICATE-----.*?END CERTIFICATE-----", pem, re.S).group(0) # type: ignore
private_key = serialization.load_pem_private_key(key_pem, password=None)
cert = x509.load_pem_x509_certificate(cert_pem)
@@ -115,7 +115,7 @@ def certificate_credentials_auth(
headers = {"x5t": x5t, "kid": x5t}
assertion = jwt.encode(claims, private_key, algorithm="RS256", headers=headers)
assertion = jwt.encode(claims, private_key, algorithm="RS256", headers=headers) # type: ignore
data = {
"grant_type": "client_credentials",

View File

@@ -1,17 +1,14 @@
from __future__ import annotations
import pathlib
import requests
import urllib.parse
from uuid import UUID
import logging
from sk.logger import log_entity_creation
DEVOPS_SCOPE = "https://app.vssps.visualstudio.com/.default"
DEVOPS_API_VERSION = "7.1"
# Get logger. It should be configured by the main application.
log = logging.getLogger(__name__)
# log.setLevel(logging.DEBUG)
# log.propagate = False
# Define a class decorator
def auto_properties(mapping: dict[str,str]):
@@ -82,7 +79,6 @@ class Organization():
self._org_url = org_url.rstrip("/") + "/" # Ensure trailing slash
self._token = token
self._api_version = api_version
log_entity_creation(log, Organization, self._org_url)
def get_path(self, path: str, params: dict = {}) -> requests.Response:
return get_url(
@@ -130,8 +126,6 @@ class Project():
except ValueError:
raise ValueError(f"Invalid project ID: {self._id}")
log_entity_creation(log, Project, self.id)
def get_auto_properties(self):
r = get_url(
URL=f"{self._org._org_url}_apis/projects/{self._id}",
@@ -194,7 +188,6 @@ class Repository():
# set other properties if provided
self.from_args(**kwargs) # type: ignore[attr-defined]
log_entity_creation(log, Repository, self.id)
def get_auto_properties(self):
id = self._id if hasattr(self, "_id") else self._name # type: ignore[attr-defined]
@@ -242,7 +235,14 @@ class Item():
def __init__(self, repository: Repository, **kwargs):
self._repository = repository
self.from_args(**kwargs) # type: ignore[attr-defined]
log_entity_creation(log, Item, self.path)
if "branch" in kwargs:
self._branch = kwargs.get("branch")
@property
def branch(self):
if hasattr(self, "_branch"):
return getattr(self, "_branch")
return None
def get_auto_properties(self):
r = self._repository._project.organization.get_path(
@@ -255,17 +255,28 @@ class Item():
)
self.from_json(r.json()) # type: ignore[attr-defined]
def get_content(self) -> bytes:
"""Get the content of the item if it is a file."""
def get_content(self, branch: str | None = None, commit: str | None = None, tag: str | None = None) -> bytes:
"""Get the content of the item with optional branch, commit, or tag."""
if self.git_object_type != "blob": # type: ignore[attr-defined]
raise ValueError("Content can only be fetched for blob items.")
params = { "path": self.path, "recursionLevel": "none" }
if self.branch and branch is None:
branch = self.branch
if branch:
params["version"] = branch
params["versionType"] = "branch"
elif tag:
params["version"] = tag
params["versionType"] = "tag"
elif commit:
params["version"] = commit
params["versionType"] = "commit"
r = self._repository._project.organization.get_path(
path=f"{self._repository._project.id}/_apis/git/repositories/{self._repository.id}/items",
params={
"path": self.path,
"recursionLevel": "none"
}
params=params
)
return r.content
@@ -273,7 +284,7 @@ class Item():
def path(self):
return self._path # type: ignore[attr-defined]
def get_child_items(self) -> list[Item]:
def get_child_items(self, pattern: str | None = None, recurse: bool = False) -> list[Item]:
"""Get child items if this item is a folder."""
if self.git_object_type != "tree": # type: ignore[attr-defined]
raise ValueError("Child items can only be fetched for folder items.")
@@ -283,12 +294,15 @@ class Item():
path=f"{self._repository.project.id}/_apis/git/repositories/{self._repository.id}/items",
params={
"scopePath": self.path,
"recursionLevel": "oneLevel"
"recursionLevel": "oneLevel" if not recurse else "full"
}
).json().get("value", [])
child_items = []
for obj in objects:
i = Item(repository=self._repository, path=obj.get("path"))
obj_path = obj.get("path")
if pattern and not pathlib.PurePath(obj_path).match(pattern):
continue
i = Item(repository=self._repository, path=obj_path)
i.from_json(obj) # type: ignore[attr-defined]
child_items.append(i)
return child_items
@@ -299,5 +313,15 @@ class Item():
self._children = self.get_child_items()
return self._children
def __getitem__(self, key: str) -> Item:
if self.git_object_type != "tree": # type: ignore[attr-defined]
raise ValueError("Child items can only be accessed for folder items.")
if not key.startswith("/"):
key = pathlib.Path(self.path).joinpath(key).absolute().as_posix()
for child in self.children:
if child.path == key:
return child
raise KeyError(f"Child item with path '{key}' not found.")
def __str__(self):
return f"Item(path=\"{self._path}\" type={self.git_object_type})" # type: ignore[attr-defined]

View File

@@ -12,17 +12,43 @@ Usage:
Now you can use the ADO_TOKEN environment variable, for example using curl:
curl -H "Authorization: Bearer $ADO_TOKEN" https://dev.azure.com/your_organization/_apis/projects?api-version=7.1
curl -sH "Authorization: Bearer $ADO_TOKEN" "https://dev.azure.com/$ADO_ORGANIZATION_URL/_apis/projects?api-version=7.1"
"""
from sk.azure import get_token
from devops.azure import get_token
from argparse import ArgumentParser
import os
args = ArgumentParser(description="Get Azure DevOps token and print it for exporting as environment variable.")
args.add_argument("--tenant-id", type=str, default=os.getenv("AZURE_TENANT_ID"), help="Azure AD Tenant ID")
args.add_argument("--client-id", type=str, default=os.getenv("AZURE_CLIENT_ID"), help="Azure AD Client ID")
args.add_argument("--pem-path", type=str, default=os.getenv("AZURE_CLIENT_CERTIFICATE_PATH"), help="Path to PEM file for authentication (optional)")
args.add_argument("--client-secret", type=str, default=os.getenv("AZURE_CLIENT_SECRET"), help="Client Secret for authentication (optional)")
args = args.parse_args()
if not args.tenant_id or not args.client_id:
print("Tenant ID and Client ID are required.")
exit(1)
if args.pem_path and os.path.isfile(args.pem_path):
token = get_token(
tenant_id="a7740229-47b6-45de-ad22-83721462b1bf",
client_id="840671c4-5dc4-40e5-aab9-7c3a07bbd652",
pem_path="./slawek-mba.pem"
tenant_id=args.tenant_id,
client_id=args.client_id,
pem_path=args.pem_path
)
elif args.client_secret:
if not args.client_secret:
print("Client secret file is empty.")
exit(1)
token = get_token(
tenant_id=args.tenant_id,
client_id=args.client_id,
client_secret=args.client_secret
)
else:
token = get_token(
tenant_id=args.tenant_id,
client_id=args.client_id
)
# print(f"Obtained token: {token}")
print(f"export ADO_TOKEN='{token}'")

View File

@@ -1,16 +1,8 @@
#!/usr/bin/env python3
from sk.devops import Organization
from sk.azure import get_token
from harvester.harvester import harvest_readmes
token = get_token(
tenant_id="a7740229-47b6-45de-ad22-83721462b1bf",
client_id="840671c4-5dc4-40e5-aab9-7c3a07bbd652",
pem_path="./slawek-mba.pem"
if __name__ == "__main__":
harvest_readmes(
organization="mcovsandbox"
)
org = Organization("https://dev.azure.com/mcovsandbox", token=token)
# print(org.projects["bafe0cf1-6c97-4088-864a-ea6dc02b2727"].repositories["feac266f-84d2-41bc-839b-736925a85eaa"].items["/generate-pat.py"])
print(org["ADO Sandbox"]["ado-auth-lab"]["/container"].url) # type: ignore[attr-defined]
print(org["ADO Sandbox"]["ado-auth-lab"]["/generate-pat.py"].url) # type: ignore[attr-defined]

1
harvester/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Harvester Package

75
harvester/harvester.py Normal file
View File

@@ -0,0 +1,75 @@
import os
import requests
from devops.azure import get_token
from devops.devops import Organization, Project, Repository, Item
import logging
fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(fmt)
log = logging.getLogger(__name__)
log.addHandler(ch)
log.setLevel(logging.INFO)
def sanitize_name(name: str) -> str:
"""Sanitize a name to be filesystem-friendly."""
return name.lower().replace(" ", "-").replace("_", "-")
def harvest_readmes(organization: str | Organization, branch: list[str | None] = ["main", "dev", None], projects: list[str] = [], output_path: str = "reference") -> None:
"""Harvest README files from repositories."""
if isinstance(organization, str):
org = Organization("https://dev.azure.com/" + organization, token=get_token())
else:
org = organization
if projects:
# Target specific projects
target_projects = [Project(org=org, name=project_name) for project_name in projects]
else:
# Target all projects
target_projects = org.projects
for project in target_projects:
repo_index = [] # Repository index for the project.
log.info(f"Processing project: {project.name} with {len(project.repositories)} repositories.") # type: ignore
for repo in project.repositories:
log.info(f"...processing repository: {repo.name}")
readme_found = False
# Try each specified branch to find the README.md file
for branch_name in branch:
try:
# Check if the README.md file exists
readme = Item(repository=repo, path="/README.md", branch=branch_name)
# Build output path and save the README content if found
if readme:
project_path = f"{output_path}/{sanitize_name(project.name)}" # type: ignore
# Create project directory if it doesn't exist
os.makedirs(project_path, exist_ok=True)
# Save README content to index.md
readme_content = readme.get_content(branch=branch_name)
if readme_content is None or len(readme_content.strip()) == 0:
continue
with open(f"{project_path}/{sanitize_name(repo.name)}.md", "w") as f:
f.write(readme_content.decode("utf-8"))
readme_found = True
break # Exit branch loop if README is found
except requests.exceptions.HTTPError:
# Repository does not have a README.md file in the specified branch
continue
# Register if README was not found in any branch
repo_index.append((repo.name, readme_found)) # README not found
# Log if the README was not found
if not readme_found:
log.warning(f"......README.md in repo {repo.name} is not found or empty.")
# Save the repository index for the project
with open(f"{output_path}/{sanitize_name(project.name)}/index.md", "w") as index_file: # type: ignore
index_file.write(f"# Repository Index for Project: {project.name}\n\n") # type: ignore
for repo_name, has_readme in repo_index:
status = "" if has_readme else " - README.md not found"
index_file.write(f"- [{repo_name}]({sanitize_name(repo_name)}.md){status}\n")
# Save the reference index for all projects
with open(f"{output_path}/index.md", "w") as ref_index_file:
ref_index_file.write("# Project Index\n\n")
for project in target_projects:
ref_index_file.write(f"- [{project.name}]({sanitize_name(project.name)}/index.md)\n") # type: ignore

View File

@@ -1,6 +1,8 @@
#! /usr/bin/env bash
python3 -m venv .venv
VERSION="${1:-3}"
python${VERSION} -m venv .venv
./.venv/bin/pip install --upgrade pip
./.venv/bin/pip install -r requirements.txt

4
mkdocs.yml Normal file
View File

@@ -0,0 +1,4 @@
site_name: Reference Documentation
docs_dir: reference
theme:
name: material

11
pyproject.toml Normal file
View File

@@ -0,0 +1,11 @@
[build-system]
requires = ["setuptools", "wheel", "build"]
build-backend = "setuptools.build_meta"
[project]
name = "devops"
version = "0.1.0"
requires-python = ">=3.12"
[tool.setuptools]
packages = ["devops"]

View File

@@ -2,3 +2,4 @@ debugpy==1.8.17
azure-identity==1.25.1
requests==2.32.5
loki-logger-handler==1.1.2
mkdocs-material>=1.5.2

1
sk/__init__.py Normal file
View File

@@ -0,0 +1 @@
# My helper tools

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3
import unittest
import requests
from sk.devops import Organization, Repository, Project, Item
from sk.azure import get_token
from devops.devops import Organization, Repository, Project, Item
from devops.azure import get_token
# Get the token outside the test class to speed up tests.
# Each Unit test instantinates the class, so doing it here avoids repeated authentication.