11 Commits
v3.5.2 ... main

Author SHA1 Message Date
Yakhyokhuja Valikhujaev
7882ec5cb4 chore: Update docstrings and comments (#119) 2026-05-11 01:07:14 +09:00
dependabot[bot]
d51d030545 chore(deps): bump gitpython from 3.1.49 to 3.1.50 (#118)
Bumps [gitpython](https://github.com/gitpython-developers/GitPython) from 3.1.49 to 3.1.50.
- [Release notes](https://github.com/gitpython-developers/GitPython/releases)
- [Changelog](https://github.com/gitpython-developers/GitPython/blob/main/CHANGES)
- [Commits](https://github.com/gitpython-developers/GitPython/compare/3.1.49...3.1.50)

---
updated-dependencies:
- dependency-name: gitpython
  dependency-version: 3.1.50
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-05-09 19:16:08 +09:00
github-actions[bot]
5a767847da chore: Release v3.6.0 2026-05-08 12:25:34 +00:00
github-actions[bot]
4a22f903f0 chore: Release v3.6.0rc2 2026-05-08 12:20:29 +00:00
Yakhyokhuja Valikhujaev
43a46e11df ci: Refresh uv.lock during release pipeline (#117) 2026-05-08 21:13:44 +09:00
github-actions[bot]
025b93ab8b chore: Release v3.6.0rc1 2026-05-08 03:27:30 +00:00
Yakhyokhuja Valikhujaev
8bf87d958f feat: Add PIPNet for facial landmarks detection (#116)
* docs: Add PipNet model documentation

* feat: Add PipNet for face landmark detection
2026-05-08 12:25:00 +09:00
Yakhyokhuja Valikhujaev
b813dc2ee7 ref: Update package mngt and optimize the vector store functions (#115)
* ref: Update download and hash chunk sizes to speed up

* build: Adopt uv with uv.lock and drop requirements.txt

* ref: Centralize softmax helper and minor cleanups
2026-05-06 01:47:27 +09:00
Yakhyokhuja Valikhujaev
73fc291930 ci: Resolve deprecation warnings in pipeline (#114)
* ci: Resolve deprecation warnings in pipeline
2026-04-28 00:52:46 +09:00
github-actions[bot]
400bb72217 chore: Release v3.5.3 2026-04-27 15:24:18 +00:00
Yakhyokhuja Valikhujaev
a0a12d5eca fix: Fix pypi publish re-run issue (#113) 2026-04-28 00:22:12 +09:00
45 changed files with 2724 additions and 473 deletions

View File

@@ -17,8 +17,8 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
- uses: pre-commit/action@v3.0.1
@@ -50,28 +50,25 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[cpu,dev]"
run: uv sync --locked --extra cpu --extra dev
- name: Check ONNX Runtime providers
run: |
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
run: uv run python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
- name: Run tests
run: pytest -v --tb=short
run: uv run pytest -v --tb=short
- name: Test package imports
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
run: uv run python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
build:
runs-on: ubuntu-latest
@@ -80,10 +77,10 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: "pip"

View File

@@ -1,8 +1,6 @@
name: Deploy Documentation
on:
release:
types: [released]
workflow_dispatch:
permissions:
@@ -12,26 +10,28 @@ jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
fetch-depth: 0
- uses: actions/setup-python@v5
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
run: uv sync --locked --extra docs
- name: Build docs
env:
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
run: mkdocs build --strict
run: uv run mkdocs build --strict
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v4
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./site

View File

@@ -20,10 +20,10 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.11"
@@ -66,21 +66,19 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[cpu,dev]"
run: uv sync --locked --extra cpu --extra dev
- name: Run tests
run: pytest -v --tb=short
run: uv run pytest -v --tb=short
release:
runs-on: ubuntu-latest
@@ -91,13 +89,13 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5
with:
fetch-depth: 0
token: ${{ secrets.RELEASE_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.11"
@@ -125,11 +123,20 @@ jobs:
p.write_text(new)
EOF
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
python-version: "3.11"
- name: Refresh uv.lock with new project version
run: uv lock --upgrade-package uniface
- name: Commit, tag, push
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add pyproject.toml uniface/__init__.py
git add pyproject.toml uniface/__init__.py uv.lock
git commit -m "chore: Release v${{ inputs.version }}"
git tag "v${{ inputs.version }}"
git push origin HEAD:${{ github.ref_name }}
@@ -148,12 +155,12 @@ jobs:
steps:
- name: Checkout tag
uses: actions/checkout@v4
uses: actions/checkout@v5
with:
ref: v${{ inputs.version }}
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: 'pip'
@@ -176,7 +183,7 @@ jobs:
run: twine upload dist/*
- name: Create GitHub Release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ inputs.version }}
files: dist/*
@@ -193,28 +200,29 @@ jobs:
steps:
- name: Checkout tag
uses: actions/checkout@v4
uses: actions/checkout@v5
with:
ref: v${{ inputs.version }}
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
run: uv sync --locked --extra docs
- name: Build docs
env:
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
run: mkdocs build --strict
run: uv run mkdocs build --strict
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v4
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./site

View File

@@ -1,130 +0,0 @@
name: Publish to PyPI
on:
push:
tags:
- "v*.*.*"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
validate:
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
version: ${{ steps.get_version.outputs.version }}
tag_version: ${{ steps.get_version.outputs.tag_version }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11" # Needs 3.11+ for tomllib
- name: Get version from tag and pyproject.toml
id: get_version
run: |
TAG_VERSION=${GITHUB_REF#refs/tags/v}
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
echo "Tag version: v$TAG_VERSION"
echo "pyproject.toml version: $PYPROJECT_VERSION"
- name: Verify version match
run: |
if [ "${{ steps.get_version.outputs.tag_version }}" != "${{ steps.get_version.outputs.version }}" ]; then
echo "Error: Tag version (${{ steps.get_version.outputs.tag_version }}) does not match pyproject.toml version (${{ steps.get_version.outputs.version }})"
exit 1
fi
echo "Version validation passed: ${{ steps.get_version.outputs.version }}"
test:
runs-on: ubuntu-latest
timeout-minutes: 15
needs: validate
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[cpu,dev]"
- name: Run tests
run: pytest -v
publish:
runs-on: ubuntu-latest
timeout-minutes: 10
needs: [validate, test]
permissions:
contents: write
id-token: write
environment:
name: pypi
url: https://pypi.org/project/uniface/
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: 'pip'
- name: Install build tools
run: |
python -m pip install --upgrade pip
python -m pip install build twine
- name: Build package
run: python -m build
- name: Check package
run: twine check dist/*
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: twine upload dist/*
- name: Detect pre-release
id: prerelease
run: |
if [[ "${{ needs.validate.outputs.version }}" =~ (a|b|rc|\.dev)[0-9]+ ]]; then
echo "is_prerelease=true" >> $GITHUB_OUTPUT
else
echo "is_prerelease=false" >> $GITHUB_OUTPUT
fi
- name: Create GitHub Release
uses: softprops/action-gh-release@v1
with:
token: ${{ secrets.RELEASE_TOKEN }}
files: dist/*
generate_release_notes: true
prerelease: ${{ steps.prerelease.outputs.is_prerelease }}

View File

@@ -1,84 +0,0 @@
name: Release
on:
workflow_dispatch:
inputs:
version:
description: 'Version (e.g. 3.6.0, 3.6.0b1, 3.6.0rc1)'
required: true
concurrency:
group: release
cancel-in-progress: false
jobs:
release:
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.RELEASE_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Validate version (PEP 440)
run: |
python - <<'EOF'
import re, sys
v = "${{ inputs.version }}"
# PEP 440 subset: X.Y.Z, optional aN / bN / rcN / .devN
if not re.fullmatch(r'\d+\.\d+\.\d+((a|b|rc)\d+|\.dev\d+)?', v):
print(f"Invalid version: {v}")
print("Expected forms: 3.6.0, 3.6.0a1, 3.6.0b1, 3.6.0rc1, 3.6.0.dev1")
sys.exit(1)
EOF
- name: Check tag does not exist
run: |
if git rev-parse "v${{ inputs.version }}" >/dev/null 2>&1; then
echo "Tag v${{ inputs.version }} already exists."
exit 1
fi
- name: Update pyproject.toml
run: |
python - <<'EOF'
import re, pathlib
p = pathlib.Path('pyproject.toml')
text = p.read_text()
new = re.sub(r'^version\s*=\s*".*"', f'version = "${{ inputs.version }}"', text, count=1, flags=re.M)
if new == text:
raise SystemExit("Failed to update version in pyproject.toml")
p.write_text(new)
EOF
- name: Update uniface/__init__.py
run: |
python - <<'EOF'
import re, pathlib
p = pathlib.Path('uniface/__init__.py')
text = p.read_text()
new = re.sub(r"^__version__\s*=\s*'.*'", f"__version__ = '${{ inputs.version }}'", text, count=1, flags=re.M)
if new == text:
raise SystemExit("Failed to update __version__ in uniface/__init__.py")
p.write_text(new)
EOF
- name: Commit, tag, push
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add pyproject.toml uniface/__init__.py
git commit -m "chore: Release v${{ inputs.version }}"
git tag "v${{ inputs.version }}"
git push origin HEAD:${{ github.ref_name }}
git push origin "v${{ inputs.version }}"

View File

@@ -21,25 +21,31 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
## Development Setup
We use [uv](https://docs.astral.sh/uv/) for reproducible dev installs. The committed `uv.lock` pins every transitive dependency so contributors and CI resolve to identical versions.
```bash
# Install uv (https://docs.astral.sh/uv/getting-started/installation/)
curl -LsSf https://astral.sh/uv/install.sh | sh
git clone https://github.com/yakhyo/uniface.git
cd uniface
pip install -e ".[dev]"
# Sync runtime + cpu + dev extras from uv.lock (use --extra gpu instead of cpu for CUDA)
uv sync --extra cpu --extra dev
```
`uv sync` creates a project-local `.venv/` and installs everything pinned in `uv.lock`. Run commands with `uv run <cmd>` (e.g. `uv run pytest`), or activate the venv with `source .venv/bin/activate`.
### Setting Up Pre-commit Hooks
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. `pre-commit` is included in the `[dev]` extra, so it's already installed after `uv sync`.
```bash
# Install pre-commit
pip install pre-commit
# Install the git hooks
pre-commit install
uv run pre-commit install
# (Optional) Run against all files
pre-commit run --all-files
uv run pre-commit run --all-files
```
Once installed, pre-commit will automatically run on every commit to check:
@@ -194,7 +200,7 @@ Releases are fully automated via GitHub Actions. Only maintainers with branch-pr
### Cutting a release
1. Go to **Actions → Release → Run workflow** on GitHub.
1. Go to **Actions → Release Pipeline → Run workflow** on GitHub.
2. Enter the version following [PEP 440](https://peps.python.org/pep-0440/):
- Stable: `0.7.0`, `1.0.0`
- Pre-release: `0.7.0rc1`, `0.7.0b1`, `0.7.0a1`, `0.7.0.dev1`
@@ -202,17 +208,13 @@ Releases are fully automated via GitHub Actions. Only maintainers with branch-pr
### What happens automatically
The `Release` workflow:
The `Release Pipeline` workflow runs all stages in sequence:
1. Validates the version string.
2. Updates `pyproject.toml` and `uniface/__init__.py`.
3. Commits `chore: Release vX.Y.Z` to `main`.
4. Creates and pushes tag `vX.Y.Z`.
Pushing the tag then triggers:
- **Publish to PyPI** — builds the package, runs tests on Python 3.103.14, uploads to PyPI, and creates a GitHub Release (flagged as pre-release for `a`/`b`/`rc`/`.dev` versions).
- **Deploy docs** — fires only after a **stable** GitHub Release is published. Pre-releases do not update the live documentation site.
1. **Validate** — checks the version string against PEP 440 and confirms the tag does not already exist.
2. **Test** — runs the test suite on Python 3.103.14.
3. **Release** — updates `pyproject.toml` and `uniface/__init__.py`, commits `chore: Release vX.Y.Z` to `main`, creates and pushes tag `vX.Y.Z`.
4. **Publish** — builds the package, uploads to PyPI, and creates a GitHub Release (flagged as pre-release for `a`/`b`/`rc`/`.dev` versions).
5. **Deploy docs** — runs only for **stable** versions. Pre-releases do not update the live documentation site.
### Verifying a release

View File

@@ -28,7 +28,7 @@
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
- **Face Recognition** — AdaFace, ArcFace, EdgeFace, MobileFace, and SphereFace embeddings
- **Face Tracking** — Multi-object tracking with [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) for persistent IDs across video frames
- **Facial Landmarks** — 106-point landmark localization module (separate from 5-point detector landmarks)
- **Facial Landmarks** — 106-point (2d106det) and 98 / 68-point (PIPNet) landmark localization (separate from the 5-point detector landmarks)
- **Face Parsing** — BiSeNet semantic segmentation (19 classes), XSeg face masking
- **Portrait Matting** — Trimap-free alpha matte with MODNet (background removal, green screen, compositing)
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
@@ -276,6 +276,7 @@ https://yakhyo.github.io/uniface/concepts/execution-providers/
| Recognition | WebFace600K | ArcFace |
| Recognition | WebFace4M / 12M | AdaFace |
| Recognition | MS1MV2 | EdgeFace |
| Landmarks | WFLW, 300W+CelebA | PIPNet (98 / 68 pts) |
| Gaze | Gaze360 | MobileGaze |
| Head Pose | 300W-LP | HeadPose (ResNet, MobileNet) |
| Parsing | CelebAMask-HQ | BiSeNet |
@@ -308,6 +309,7 @@ If you plan commercial use, verify model license compatibility.
| Tracking | [bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) | - | BYTETracker Multi-Object Tracking |
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | ✓ | MobileFace, SphereFace Training |
| Recognition | [edgeface-onnx](https://github.com/yakhyo/edgeface-onnx) | - | EdgeFace ONNX Inference |
| Landmarks | [pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) | - | PIPNet 98 / 68-point ONNX Inference |
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | ✓ | BiSeNet Face Parsing |
| Parsing | [face-segmentation](https://github.com/yakhyo/face-segmentation) | - | XSeg Face Segmentation |
| Gaze | [gaze-estimation](https://github.com/yakhyo/gaze-estimation) | ✓ | MobileGaze Training |

View File

@@ -110,6 +110,28 @@ landmarks = landmarker.get_landmarks(image, face.bbox)
| 63-86 | Eyes | 24 |
| 87-105 | Mouth | 19 |
### 98 / 68-Point Landmarks (PIPNet)
Returned by `PIPNet`. The variant determines the layout:
```python
from uniface.constants import PIPNetWeights
from uniface.landmark import PIPNet
# 98-point WFLW layout (default)
landmarks = PIPNet().get_landmarks(image, face.bbox)
# Shape: (98, 2)
# 68-point 300W layout
landmarks = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68).get_landmarks(image, face.bbox)
# Shape: (68, 2)
```
The 98-point output follows the standard [WFLW](https://wywu.github.io/projects/LAB/WFLW.html) layout
(33 face-contour points, eyebrow/eye/nose/mouth groups). The 68-point output follows the standard
[300W / iBUG](https://ibug.doc.ic.ac.uk/resources/300-W/) layout. Coordinates are in original-image
pixel space, identical in convention to `Landmark106`.
---
## Face Crop

View File

@@ -43,7 +43,7 @@ All **ONNX-based** model classes accept the `providers` parameter:
- Detection: `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face`
- Recognition: `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace`
- Landmarks: `Landmark106`
- Landmarks: `Landmark106`, `PIPNet`
- Gaze: `MobileGaze`
- Parsing: `BiSeNet`, `XSeg`
- Attributes: `AgeGender`, `FairFace`

View File

@@ -43,7 +43,7 @@ class Face:
# Required (from detection)
bbox: np.ndarray # [x1, y1, x2, y2]
confidence: float # 0.0 to 1.0
landmarks: np.ndarray # (5, 2) or (106, 2)
landmarks: np.ndarray # (5, 2) from detectors. Dense landmarkers return (106, 2), (98, 2), or (68, 2).
# Optional (enriched by analyzers)
embedding: np.ndarray | None = None

View File

@@ -194,6 +194,8 @@ If a model fails verification, it's re-downloaded automatically.
| Model | Size | Download |
|-------|------|----------|
| Landmark106 | 14 MB | ✅ |
| PIPNet WFLW-98 | 47 MB | ✅ |
| PIPNet 300W+CelebA-68 | 46 MB | ✅ |
| AgeGender | 8 MB | ✅ |
| FairFace | 44 MB | ✅ |
| Gaze ResNet34 | 82 MB | ✅ |

View File

@@ -117,7 +117,7 @@ uniface/
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
├── recognition/ # Face recognition (AdaFace, ArcFace, EdgeFace, MobileFace, SphereFace)
├── tracking/ # Multi-object tracking (BYTETracker)
├── landmark/ # 106-point landmarks
├── landmark/ # Dense landmarks (Landmark106 = 106 pts, PIPNet = 98 / 68 pts)
├── attribute/ # Age, gender, emotion, race
├── parsing/ # Face semantic segmentation
├── matting/ # Portrait matting (MODNet)

View File

@@ -6,16 +6,20 @@ Thank you for contributing to UniFace!
## Quick Start
We use [uv](https://docs.astral.sh/uv/) for reproducible dev installs (lockfile-pinned).
```bash
# Install uv first: https://docs.astral.sh/uv/getting-started/installation/
# Clone
git clone https://github.com/yakhyo/uniface.git
cd uniface
# Install dev dependencies
pip install -e ".[dev]"
# Install runtime + cpu + dev extras from uv.lock (--extra gpu for CUDA)
uv sync --extra cpu --extra dev
# Run tests
pytest
uv run pytest
```
---
@@ -39,10 +43,11 @@ ruff check . --fix
## Pre-commit Hooks
`pre-commit` is included in the `[dev]` extra, so `uv sync` already installs it.
```bash
pip install pre-commit
pre-commit install
pre-commit run --all-files
uv run pre-commit install
uv run pre-commit run --all-files
```
---
@@ -101,7 +106,7 @@ docs: Update installation instructions
## Releases
Releases are automated via GitHub Actions. Maintainers trigger **Actions → Release → Run workflow** with a [PEP 440](https://peps.python.org/pep-0440/) version (e.g. `0.7.0`, `0.7.0rc1`). The workflow bumps `pyproject.toml` + `uniface/__init__.py`, tags the commit, and publishes to PyPI. Docs redeploy only for stable releases.
Releases are automated via GitHub Actions. Maintainers trigger **Actions → Release Pipeline → Run workflow** with a [PEP 440](https://peps.python.org/pep-0440/) version (e.g. `0.7.0`, `0.7.0rc1`). The pipeline runs tests, bumps `pyproject.toml` + `uniface/__init__.py`, tags the commit, publishes to PyPI, and creates a GitHub Release. Docs redeploy only for stable releases.
See [CONTRIBUTING.md](https://github.com/yakhyo/uniface/blob/main/CONTRIBUTING.md#release-process) for the full process.

View File

@@ -12,6 +12,7 @@ Overview of all training datasets and evaluation benchmarks used by UniFace mode
| Recognition | [MS1MV2](#ms1mv2) | 5.8M images, 85.7K IDs | MobileFace, SphereFace |
| Recognition | [WebFace600K](#webface600k) | 600K images | ArcFace |
| Recognition | [WebFace4M / WebFace12M](#webface4m--webface12m) | 4M / 12M images | AdaFace |
| Landmarks | [WFLW](#wflw) / [300W+CelebA](#300w--celeba) | 10K / 3.8K labeled + 202.6K unlabeled | PIPNet (98 / 68 pts) |
| Gaze | [Gaze360](#gaze360) | 238 subjects | MobileGaze |
| Parsing | [CelebAMask-HQ](#celebamask-hq) | 30K images | BiSeNet |
| Attributes | [CelebA](#celeba) | 200K images | AgeGender |
@@ -126,6 +127,41 @@ Large-scale dataset with wide variations in pose, age, illumination, ethnicity,
---
### Facial Landmarks
#### WFLW
Wider Facial Landmarks in-the-Wild — a 98-point landmark dataset whose images come from
WIDER FACE. Used to train the supervised PIPNet 98-point variant shipped with UniFace.
| Property | Value |
| ---------- | -------------------------------------- |
| Images | 10,000 (7,500 train / 2,500 test) |
| Annotation | 98 manually labeled landmarks per face |
| Used by | PIPNet WFLW-98 |
!!! info "Reference"
**Project page**: [WFLW dataset](https://wywu.github.io/projects/LAB/WFLW.html)
---
#### 300W + CelebA
The 68-point PIPNet variant is trained in a generalizable semi-supervised setting (GSSL):
labeled images come from 300W and unlabeled images come from CelebA.
| Property | Value |
| --------------- | -------------------------------------------------------------------------------- |
| Labeled images | 3,837 (3,148 train: LFPW train + HELEN train + AFW; 689 test: LFPW test + HELEN test + iBUG) |
| Unlabeled | 202,599 (full CelebA; bounding boxes from RetinaFace per the PIPNet paper) |
| Annotation | 68-point iBUG layout |
| Used by | PIPNet 300W+CelebA-68 |
!!! info "Reference"
**Paper**: [PIPNet (Pixel-in-Pixel Net)](https://arxiv.org/abs/2003.03771) (IJCV 2021)
---
### Gaze Estimation
#### Gaze360

View File

@@ -41,7 +41,7 @@ AdaFace, ArcFace, EdgeFace, MobileFace, and SphereFace embeddings for identity v
<div class="feature-card" markdown>
### :material-map-marker: Landmarks
Accurate 106-point facial landmark localization for detailed face analysis.
Dense facial landmark localization — 106-point (2d106det) and 98 / 68-point (PIPNet) variants.
</div>
<div class="feature-card" markdown>

View File

@@ -23,3 +23,4 @@ UniFace is released under the [MIT License](https://opensource.org/licenses/MIT)
| MODNet | [yakhyo/modnet](https://github.com/yakhyo/modnet) | Apache-2.0 |
| MiniFASNet | [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | Apache-2.0 |
| FairFace | [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) | CC BY 4.0 |
| PIPNet | [yakhyo/pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) — meanface tables vendored from [jhb86253817/PIPNet](https://github.com/jhb86253817/PIPNet) | MIT |

View File

@@ -196,6 +196,26 @@ Facial landmark localization model.
---
### PIPNet (98 / 68 points)
PIPNet (Pixel-in-Pixel Net) facial landmark detector. ResNet-18 backbone, 256×256 input.
| Model Name | Points | Backbone | Dataset | Size |
| ---------- | ------ | -------- | ------- | ---- |
| `WFLW_98` :material-check-circle: | 98 | ResNet-18 | WFLW (supervised) | 47 MB |
| `DW300_CELEBA_68` | 68 | ResNet-18 | 300W+CelebA (GSSL) | 46 MB |
!!! info "Reference"
**Paper**: [PIPNet: Towards Efficient Facial Landmark Detection in the Wild](https://arxiv.org/abs/2003.03771) (IJCV 2021)
**Source**: [yakhyo/pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) — ONNX export from [jhb86253817/PIPNet](https://github.com/jhb86253817/PIPNet)
!!! note "Auto-selected meanface"
Both variants share the same architecture; the number of landmarks (and the matching
meanface table) is inferred from the ONNX output channel count.
---
## Attribute Analysis Models
### Age & Gender Detection
@@ -453,6 +473,7 @@ See [Model Cache & Offline Use](concepts/model-cache-offline.md) for full detail
- **Portrait Matting**: [yakhyo/modnet](https://github.com/yakhyo/modnet) - MODNet ported weights and inference (from [ZHKKKe/MODNet](https://github.com/ZHKKKe/MODNet))
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
- **PIPNet**: [yakhyo/pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) - PIPNet ONNX export and inference (from [jhb86253817/PIPNet](https://github.com/jhb86253817/PIPNet))
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
### Papers
@@ -465,3 +486,4 @@ See [Model Cache & Offline Use](concepts/model-cache-offline.md) for full detail
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
- **MODNet**: [Real-Time Trimap-Free Portrait Matting via Objective Decomposition](https://arxiv.org/abs/2011.11961)
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
- **PIPNet**: [Towards Efficient Facial Landmark Detection in the Wild](https://arxiv.org/abs/2003.03771)

View File

@@ -291,6 +291,6 @@ python tools/detect.py --source image.jpg
## See Also
- [Recognition Module](recognition.md) - Extract embeddings from detected faces
- [Landmarks Module](landmarks.md) - Get 106-point landmarks
- [Landmarks Module](landmarks.md) - Get 106 / 98 / 68-point dense landmarks
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete detection workflow
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning detection parameters

View File

@@ -14,6 +14,8 @@ Facial landmark detection provides precise localization of facial features.
| Model | Points | Size |
|-------|--------|------|
| **Landmark106** | 106 | 14 MB |
| **PIPNet (WFLW-98)** | 98 | 47 MB |
| **PIPNet (300W+CelebA-68)** | 68 | 46 MB |
!!! info "5-Point Landmarks"
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face).
@@ -79,6 +81,48 @@ mouth = landmarks[87:106]
---
## PIPNet (98 / 68 points)
PIPNet (Pixel-in-Pixel Net) is a high-accuracy facial landmark detector. UniFace ships
two ONNX variants that share a ResNet-18 backbone and 256×256 input — the only difference
is the number of points and the dataset they were trained on.
### Basic Usage
```python
from uniface.detection import RetinaFace
from uniface.landmark import PIPNet
detector = RetinaFace()
landmarker = PIPNet() # Default: 98 points (WFLW)
faces = detector.detect(image)
if faces:
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
print(f"Landmarks shape: {landmarks.shape}") # (98, 2)
```
### 68-Point Variant (300W+CelebA, GSSL)
```python
from uniface.constants import PIPNetWeights
from uniface.landmark import PIPNet
landmarker = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68)
landmarks = landmarker.get_landmarks(image, face.bbox)
print(landmarks.shape) # (68, 2)
```
### Notes
- The number of landmarks is read from the ONNX output and the matching meanface
table is selected automatically — there is no `num_lms=` argument.
- PIPNet uses an asymmetric crop around the bbox (+10% left / right / bottom,
10% top) and ImageNet normalization. This is handled internally.
- Output landmarks are in original-image pixel coordinates as `float32`.
---
## 5-Point Landmarks (Detection)
All detection models provide 5-point landmarks:
@@ -242,9 +286,17 @@ def estimate_head_pose(landmarks, image_shape):
## Factory Function
```python
from uniface.constants import PIPNetWeights
from uniface.landmark import create_landmarker
landmarker = create_landmarker() # Returns Landmark106
# Default: 106-point InsightFace model
landmarker = create_landmarker()
# 98-point PIPNet (WFLW)
landmarker = create_landmarker('pipnet')
# 68-point PIPNet (300W+CelebA)
landmarker = create_landmarker('pipnet', model_name=PIPNetWeights.DW300_CELEBA_68)
```
---

View File

@@ -166,7 +166,9 @@ Face 2: Female, 20-29, White
---
## Facial Landmarks (106 Points)
## Facial Landmarks (106 / 98 / 68 Points)
UniFace ships two dense-landmark families. Pick whichever fits your downstream task:
```python
import cv2
@@ -174,14 +176,14 @@ from uniface.detection import RetinaFace
from uniface.landmark import Landmark106
detector = RetinaFace()
landmarker = Landmark106()
landmarker = Landmark106() # 106-point InsightFace 2d106det model
image = cv2.imread("photo.jpg")
faces = detector.detect(image)
if faces:
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
print(f"Detected {len(landmarks)} landmarks")
print(f"Detected {len(landmarks)} landmarks") # 106
# Draw landmarks
for x, y in landmarks.astype(int):
@@ -190,6 +192,21 @@ if faces:
cv2.imwrite("landmarks.jpg", image)
```
**PIPNet (98 / 68 points)** — ResNet-18 backbone trained on WFLW (98 pts) or 300W+CelebA (68 pts):
```python
from uniface.constants import PIPNetWeights
from uniface.landmark import PIPNet
# 98-point WFLW model (default)
landmarker_98 = PIPNet()
# 68-point 300W+CelebA model
landmarker_68 = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68)
landmarks = landmarker_98.get_landmarks(image, faces[0].bbox) # (98, 2)
```
---
## Gaze Estimation
@@ -465,7 +482,8 @@ For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
| Task | Available Models |
|------|------------------|
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
| Recognition | `ArcFace`, `AdaFace`, `EdgeFace`, `MobileFace`, `SphereFace` |
| Landmarks | `Landmark106` (106 pts), `PIPNet` (98 / 68 pts) |
| Tracking | `BYTETracker` |
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
| Head Pose | `HeadPose` (ResNet18/34/50, MobileNetV2/V3) |
@@ -513,7 +531,7 @@ python -c "import platform; print(platform.machine())"
from uniface.detection import RetinaFace, SCRFD
from uniface.recognition import ArcFace, AdaFace
from uniface.attribute import AgeGender, FairFace
from uniface.landmark import Landmark106
from uniface.landmark import Landmark106, PIPNet
from uniface.gaze import MobileGaze
from uniface.headpose import HeadPose
from uniface.parsing import BiSeNet, XSeg

View File

@@ -1,6 +1,6 @@
[project]
name = "uniface"
version = "3.5.2"
version = "3.6.0"
description = "UniFace: A Unified Face Analysis Library for Python"
readme = "README.md"
license = "MIT"
@@ -51,14 +51,20 @@ dependencies = [
]
[project.optional-dependencies]
cpu = ["onnxruntime>=1.16.0"]
gpu = ["onnxruntime-gpu>=1.16.0"]
cpu = [
"onnxruntime>=1.16.0; python_version >= '3.11'",
"onnxruntime>=1.16.0,<1.24; python_version < '3.11'",
]
gpu = [
"onnxruntime-gpu>=1.16.0; python_version >= '3.11'",
"onnxruntime-gpu>=1.16.0,<1.24; python_version < '3.11'",
]
dev = ["pytest>=7.0.0", "ruff>=0.4.0", "pre-commit>=3.0.0"]
docs = [
"mkdocs-material>=9.0",
"pymdown-extensions>=10.0",
"mkdocs-git-committers-plugin-2>=1.0",
"mkdocs-git-revision-date-localized-plugin>=2.0",
"mkdocs-material",
"pymdown-extensions",
"mkdocs-git-committers-plugin-2",
"mkdocs-git-revision-date-localized-plugin",
]
[project.urls]

View File

@@ -1,9 +0,0 @@
numpy>=1.21.0
opencv-python>=4.5.0
scikit-image>=0.22.0
scipy>=1.7.0
requests>=2.28.0
tqdm>=4.64.0
# Install ONE of the following (not both):
# onnxruntime>=1.16.0 # CPU / Apple Silicon → pip install uniface[cpu]
# onnxruntime-gpu>=1.16.0 # NVIDIA CUDA → pip install uniface[gpu]

View File

@@ -1,61 +0,0 @@
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
import numpy as np
from uniface.draw import draw_gaze
def _compute_gaze_delta(bbox: np.ndarray, pitch: float, yaw: float) -> tuple[int, int]:
"""Replicate draw_gaze dx/dy math for verification."""
x_min, _, x_max, _ = map(int, bbox[:4])
length = x_max - x_min
dx = int(-length * np.sin(yaw) * np.cos(pitch))
dy = int(-length * np.sin(pitch))
return dx, dy
def test_draw_gaze_yaw_only_moves_horizontally():
"""Yaw-only input (pitch=0) should produce horizontal displacement only."""
image = np.zeros((200, 200, 3), dtype=np.uint8)
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
yaw = 0.5
pitch = 0.0
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
assert dx != 0, 'Yaw-only should produce horizontal displacement'
assert dy == 0, 'Yaw-only should produce zero vertical displacement'
# Should not raise
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
def test_draw_gaze_pitch_only_moves_vertically():
"""Pitch-only input (yaw=0) should produce vertical displacement only."""
image = np.zeros((200, 200, 3), dtype=np.uint8)
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
yaw = 0.0
pitch = 0.5
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
assert dx == 0, 'Pitch-only should produce zero horizontal displacement'
assert dy != 0, 'Pitch-only should produce vertical displacement'
# Should not raise
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
def test_draw_gaze_modifies_image():
"""draw_gaze should modify the image in place."""
image = np.zeros((200, 200, 3), dtype=np.uint8)
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
original = image.copy()
draw_gaze(image, bbox, 0.3, 0.3)
assert not np.array_equal(image, original), 'draw_gaze should modify the image'

View File

@@ -130,6 +130,25 @@ def test_create_landmarker_invalid_method():
create_landmarker('invalid_method')
def test_create_landmarker_pipnet_default():
"""create_landmarker('pipnet') returns a PIPNet (98 points by default)."""
from uniface.landmark import PIPNet
landmarker = create_landmarker('pipnet')
assert isinstance(landmarker, PIPNet), 'Should return PIPNet instance'
assert landmarker.num_lms == 98
def test_create_landmarker_pipnet_68():
"""create_landmarker('pipnet', model_name=...) selects the 68-point variant."""
from uniface.constants import PIPNetWeights
from uniface.landmark import PIPNet
landmarker = create_landmarker('pipnet', model_name=PIPNetWeights.DW300_CELEBA_68)
assert isinstance(landmarker, PIPNet), 'Should return PIPNet instance'
assert landmarker.num_lms == 68
# list_available_detectors tests
def test_list_available_detectors():
"""
@@ -189,6 +208,17 @@ def test_landmarker_inference_from_factory():
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
def test_pipnet_landmarker_inference_from_factory():
"""PIPNet landmarker created from factory can perform inference."""
landmarker = create_landmarker('pipnet')
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
mock_bbox = [100, 100, 300, 300]
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
assert landmarks is not None, 'Landmarker should return landmarks'
assert landmarks.shape == (98, 2), 'Should return 98 landmarks'
def test_multiple_detector_creation():
"""
Test that multiple detectors can be created independently.

View File

@@ -0,0 +1,132 @@
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
import numpy as np
import pytest
from uniface.constants import PIPNetWeights
from uniface.landmark import PIPNet
@pytest.fixture(scope='module', params=[PIPNetWeights.WFLW_98, PIPNetWeights.DW300_CELEBA_68])
def pipnet_model(request):
return PIPNet(model_name=request.param)
@pytest.fixture
def mock_image():
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
@pytest.fixture
def mock_bbox():
return [100, 100, 300, 300]
def _expected_n_lms(model: PIPNet) -> int:
return 98 if model.num_lms == 98 else 68
def test_model_initialization(pipnet_model):
assert pipnet_model is not None, 'PIPNet model initialization failed.'
assert pipnet_model.num_lms in (68, 98), f'Unexpected num_lms: {pipnet_model.num_lms}'
assert pipnet_model.input_h == pipnet_model.input_w == 256
def test_landmark_detection(pipnet_model, mock_image, mock_bbox):
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
n = _expected_n_lms(pipnet_model)
assert landmarks.shape == (n, 2), f'Expected shape ({n}, 2), got {landmarks.shape}'
def test_landmark_dtype(pipnet_model, mock_image, mock_bbox):
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
def test_landmark_coordinates_within_image(pipnet_model, mock_image, mock_bbox):
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
n = _expected_n_lms(pipnet_model)
x_coords = landmarks[:, 0]
y_coords = landmarks[:, 1]
x1, y1, x2, y2 = mock_bbox
margin = 50
x_in_bounds = int(np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin)))
y_in_bounds = int(np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin)))
threshold = max(int(0.9 * n), n - 5)
assert x_in_bounds >= threshold, f'Only {x_in_bounds}/{n} x-coordinates within bounds'
assert y_in_bounds >= threshold, f'Only {y_in_bounds}/{n} y-coordinates within bounds'
def test_different_bbox_sizes(pipnet_model, mock_image):
n = _expected_n_lms(pipnet_model)
test_bboxes = [
[50, 50, 150, 150],
[100, 100, 300, 300],
[50, 50, 400, 400],
]
for bbox in test_bboxes:
landmarks = pipnet_model.get_landmarks(mock_image, bbox)
assert landmarks.shape == (n, 2), f'Failed for bbox {bbox}'
def test_consistency(pipnet_model, mock_image, mock_bbox):
landmarks1 = pipnet_model.get_landmarks(mock_image, mock_bbox)
landmarks2 = pipnet_model.get_landmarks(mock_image, mock_bbox)
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
def test_different_image_sizes(pipnet_model, mock_bbox):
n = _expected_n_lms(pipnet_model)
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
for size in test_sizes:
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
assert landmarks.shape == (n, 2), f'Failed for image size {size}'
def test_bbox_list_format(pipnet_model, mock_image):
n = _expected_n_lms(pipnet_model)
landmarks = pipnet_model.get_landmarks(mock_image, [100, 100, 300, 300])
assert landmarks.shape == (n, 2), 'Should work with bbox as list'
def test_bbox_array_format(pipnet_model, mock_image):
n = _expected_n_lms(pipnet_model)
bbox_array = np.array([100, 100, 300, 300])
landmarks = pipnet_model.get_landmarks(mock_image, bbox_array)
assert landmarks.shape == (n, 2), 'Should work with bbox as numpy array'
def test_landmark_distribution(pipnet_model, mock_image, mock_bbox):
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
x_variance = np.var(landmarks[:, 0])
y_variance = np.var(landmarks[:, 1])
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
def test_default_model_is_wflw_98():
"""PIPNet() with no args should default to the 98-point WFLW model."""
model = PIPNet()
assert model.num_lms == 98
def test_meanface_lookup_invalid_num_lms():
"""get_meanface_info should reject unsupported landmark counts."""
from uniface.landmark._meanface import get_meanface_info
with pytest.raises(ValueError, match='No meanface table'):
get_meanface_info(num_lms=42)

View File

@@ -198,7 +198,10 @@ def main():
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
parser_arg.add_argument(
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
'--model',
type=ParsingWeights,
default=ParsingWeights.RESNET18,
choices=list(ParsingWeights),
)
parser_arg.add_argument(
'--expand-ratio',

View File

@@ -17,7 +17,7 @@ This library provides unified APIs for:
- Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
- Face recognition (AdaFace, ArcFace, EdgeFace, MobileFace, SphereFace)
- Face tracking (ByteTrack with Kalman filtering)
- Facial landmarks (106-point detection)
- Facial landmarks (106 / 98 / 68-point detection: 2d106det, PIPNet)
- Face parsing (semantic segmentation)
- Portrait matting (trimap-free alpha matte)
- Gaze estimation
@@ -31,7 +31,7 @@ from __future__ import annotations
__license__ = 'MIT'
__author__ = 'Yakhyokhuja Valikhujaev'
__version__ = '3.5.2'
__version__ = '3.6.0'
import contextlib
@@ -51,7 +51,7 @@ from .detection import (
)
from .gaze import MobileGaze, create_gaze_estimator
from .headpose import HeadPose, create_head_pose_estimator
from .landmark import Landmark106, create_landmarker
from .landmark import Landmark106, PIPNet, create_landmarker
from .matting import MODNet, create_matting_model
from .parsing import BiSeNet, XSeg, create_face_parser
from .privacy import BlurFace
@@ -95,6 +95,7 @@ __all__ = [
'SphereFace',
# Landmark models
'Landmark106',
'PIPNet',
# Gaze models
'GazeResult',
'MobileGaze',

View File

@@ -113,9 +113,10 @@ class FaceAnalyzer:
return faces
def __repr__(self) -> str:
parts = [f'FaceAnalyzer(detector={self.detector.__class__.__name__}']
parts = [f'detector={self.detector.__class__.__name__}']
if self.recognizer:
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
for attr in self.attributes:
parts.append(f'{attr.__class__.__name__}')
return ', '.join(parts) + ')'
if self.attributes:
attr_names = ', '.join(attr.__class__.__name__ for attr in self.attributes)
parts.append(f'attributes=[{attr_names}]')
return f'FaceAnalyzer({", ".join(parts)})'

View File

@@ -7,6 +7,7 @@ import cv2
import numpy as np
from uniface.attribute.base import Attribute
from uniface.common import softmax
from uniface.constants import FairFaceWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
@@ -150,9 +151,9 @@ class FairFace(Attribute):
race_logits, gender_logits, age_logits = prediction
# Apply softmax
race_probs = self._softmax(race_logits[0])
gender_probs = self._softmax(gender_logits[0])
age_probs = self._softmax(age_logits[0])
race_probs = softmax(race_logits[0])
gender_probs = softmax(gender_logits[0])
age_probs = softmax(age_logits[0])
# Get predictions
race_idx = int(np.argmax(race_probs))
@@ -186,9 +187,3 @@ class FairFace(Attribute):
face.age_group = result.age_group
face.race = result.race
return result
@staticmethod
def _softmax(x: np.ndarray) -> np.ndarray:
"""Compute softmax values for numerical stability."""
exp_x = np.exp(x - np.max(x))
return exp_x / np.sum(exp_x)

View File

@@ -19,6 +19,7 @@ __all__ = [
'letterbox_resize',
'non_max_suppression',
'resize_image',
'softmax',
'xyxy_to_cxcywh',
]
@@ -63,6 +64,21 @@ def resize_image(
return image, resize_factor
def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
"""Compute the numerically stable softmax of an array along ``axis``.
Args:
x: Input array.
axis: Axis along which softmax is computed. Defaults to the last axis.
Returns:
Array of the same shape as *x* with values in ``[0, 1]`` summing to 1
along *axis*.
"""
exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
def xyxy_to_cxcywh(bboxes: np.ndarray) -> np.ndarray:
"""Convert bounding boxes from ``[x1, y1, x2, y2]`` to ``[cx, cy, w, h]``.

View File

@@ -155,6 +155,16 @@ class LandmarkWeights(str, Enum):
DEFAULT = "2d_106"
class PIPNetWeights(str, Enum):
"""
PIPNet: Pixel-in-Pixel Net for facial landmark detection.
ResNet-18 backbone, 256x256 input.
https://github.com/yakhyo/pipnet-onnx
"""
WFLW_98 = "pipnet_r18_wflw_98"
DW300_CELEBA_68 = "pipnet_r18_300w_celeba_68"
class GazeWeights(str, Enum):
"""
MobileGaze: Real-Time Gaze Estimation models.
@@ -379,6 +389,16 @@ MODEL_REGISTRY: dict[Enum, ModelInfo] = {
sha256='f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf'
),
# PIPNet (98 / 68 point landmarks)
PIPNetWeights.WFLW_98: ModelInfo(
url='https://github.com/yakhyo/pipnet-onnx/releases/download/weights/pipnet_r18_wflw_98.onnx',
sha256='9862838dc6144bc772b6485f6f6d31295c0b1c1ab7293e6ddeb0a439cb10218d'
),
PIPNetWeights.DW300_CELEBA_68: ModelInfo(
url='https://github.com/yakhyo/pipnet-onnx/releases/download/weights/pipnet_r18_300w_celeba_68.onnx',
sha256='63fa56fd4b8f6ccc4b88f2b36e00fa3d8c21a2c4244ab9381e8b432cef35197b'
),
# Gaze (MobileGaze)
GazeWeights.RESNET18: ModelInfo(
url='https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet18_gaze.onnx',
@@ -469,4 +489,5 @@ MODEL_REGISTRY: dict[Enum, ModelInfo] = {
MODEL_URLS: dict[Enum, str] = {k: v.url for k, v in MODEL_REGISTRY.items()}
MODEL_SHA256: dict[Enum, str] = {k: v.sha256 for k, v in MODEL_REGISTRY.items()}
CHUNK_SIZE = 8192
DOWNLOAD_CHUNK_SIZE = 256 * 1024 # 256 KiB
HASH_CHUNK_SIZE = 1024 * 1024 # 1 MiB

View File

@@ -202,7 +202,7 @@ class RetinaFace(BaseDetector):
height, width, _ = image.shape
image_tensor = self.preprocess(image)
# ONNXRuntime inference
# Inference
outputs = self.inference(image_tensor)
# Postprocessing

View File

@@ -247,9 +247,10 @@ class SCRFD(BaseDetector):
image_tensor = self.preprocess(image)
# ONNXRuntime inference
# Inference
outputs = self.inference(image_tensor)
# Postprocessing
scores_list, bboxes_list, kpss_list = self.postprocess(outputs, image_size=image.shape[:2])
# Handle case when no faces are detected

View File

@@ -2,18 +2,11 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""
YOLOv8-Face detector implementation.
Uses anchor-free design with DFL (Distribution Focal Loss) for bbox regression.
Reference: https://github.com/yakhyo/yolov8-face-onnx-inference
"""
from typing import Any, Literal
import numpy as np
from uniface.common import letterbox_resize, non_max_suppression
from uniface.common import letterbox_resize, non_max_suppression, softmax
from uniface.constants import YOLOv8FaceWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
@@ -171,12 +164,6 @@ class YOLOv8Face(BaseDetector):
"""
return self.session.run(self.output_names, {self.input_names: input_tensor})
@staticmethod
def _softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
"""Compute softmax values for array x along specified axis."""
exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
def postprocess(
self,
predictions: list[np.ndarray],
@@ -224,7 +211,7 @@ class YOLOv8Face(BaseDetector):
# Decode bounding boxes from DFL
bbox_pred = bbox_pred.reshape(-1, 4, 16)
bbox_dist = self._softmax(bbox_pred, axis=-1) @ np.arange(16)
bbox_dist = softmax(bbox_pred, axis=-1) @ np.arange(16)
# Convert distances to xyxy format
x1 = (grid_x - bbox_dist[:, 0]) * stride
@@ -279,16 +266,14 @@ class YOLOv8Face(BaseDetector):
if len(keep) == 0:
return np.array([]), np.array([])
# Limit to max_det
# Filter detections and limit to max_det
keep = keep[: self.max_det]
boxes = boxes[keep]
scores = scores[keep]
landmarks = landmarks[keep]
# === SCALE TO ORIGINAL IMAGE COORDINATES ===
# Scale back to original image coordinates
pad_w, pad_h = padding
# Scale boxes back to original image coordinates
boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale
boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale
@@ -303,7 +288,7 @@ class YOLOv8Face(BaseDetector):
# Reshape landmarks to (N, 5, 2)
landmarks = landmarks.reshape(-1, 5, 2)
# Combine box and score
# Combine results
detections = np.concatenate([boxes, scores[:, None]], axis=1)
return detections, landmarks

View File

@@ -677,14 +677,10 @@ def vis_parsing_maps(
segmentation_mask = segmentation_mask.copy().astype(np.uint8)
# Create a color mask
segmentation_mask_color = np.zeros((segmentation_mask.shape[0], segmentation_mask.shape[1], 3))
num_classes = np.max(segmentation_mask)
for class_index in range(1, num_classes + 1):
class_pixels = np.where(segmentation_mask == class_index)
segmentation_mask_color[class_pixels[0], class_pixels[1], :] = FACE_PARSING_COLORS[class_index]
segmentation_mask_color = segmentation_mask_color.astype(np.uint8)
max_class = int(segmentation_mask.max())
palette = np.zeros((max(max_class + 1, len(FACE_PARSING_COLORS)), 3), dtype=np.uint8)
palette[: len(FACE_PARSING_COLORS)] = FACE_PARSING_COLORS
segmentation_mask_color = palette[segmentation_mask]
# Convert image to BGR format for blending
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

View File

@@ -6,6 +6,7 @@
import cv2
import numpy as np
from uniface.common import softmax
from uniface.constants import GazeWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
@@ -142,11 +143,6 @@ class MobileGaze(BaseGazeEstimator):
return image
def _softmax(self, x: np.ndarray) -> np.ndarray:
"""Apply softmax along axis 1."""
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return e_x / e_x.sum(axis=1, keepdims=True)
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
"""
Postprocess raw model outputs into gaze angles.
@@ -164,8 +160,8 @@ class MobileGaze(BaseGazeEstimator):
yaw_logits, pitch_logits = outputs
# Convert logits to probabilities
yaw_probs = self._softmax(yaw_logits)
pitch_probs = self._softmax(pitch_logits)
yaw_probs = softmax(yaw_logits)
pitch_probs = softmax(pitch_logits)
# Compute expected bin index (soft-argmax)
yaw_deg = np.sum(yaw_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
@@ -183,6 +179,13 @@ class MobileGaze(BaseGazeEstimator):
This method orchestrates the full pipeline: preprocessing the input,
running inference, and postprocessing to return the gaze direction.
Args:
face_image (np.ndarray): A cropped face image in BGR format with shape (H, W, 3).
Returns:
GazeResult: Estimated gaze direction containing ``pitch`` (vertical) and
``yaw`` (horizontal) angles in radians.
"""
input_tensor = self.preprocess(face_image)
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})

View File

@@ -170,6 +170,13 @@ class HeadPose(BaseHeadPoseEstimator):
This method orchestrates the full pipeline: preprocessing the input,
running inference, and postprocessing to return the head orientation.
Args:
face_image (np.ndarray): A cropped face image in BGR format with shape (H, W, 3).
Returns:
HeadPoseResult: Estimated head orientation containing ``pitch`` (vertical),
``yaw`` (horizontal), and ``roll`` (in-plane) angles in degrees.
"""
input_tensor = self.preprocess(face_image)
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})

View File

@@ -4,26 +4,35 @@
from .base import BaseLandmarker
from .models import Landmark106
from .pipnet import PIPNet
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
"""
Factory function to create facial landmark predictors.
"""Factory function to create facial landmark predictors.
Args:
method (str): Landmark prediction method.
Options: '2d106det' (default), 'landmark106', '106'.
**kwargs: Model-specific parameters.
Options:
- ``'2d106det'`` (default): InsightFace 2d106det 106-point model.
- ``'pipnet'``: PIPNet 98-point (WFLW) or 68-point (300W+CelebA)
model. Pass ``model_name=PIPNetWeights.DW300_CELEBA_68`` for
the 68-point variant.
**kwargs: Model-specific parameters forwarded to the underlying class.
Returns:
Initialized landmarker instance.
Raises:
ValueError: If ``method`` is not supported.
"""
method = method.lower()
if method in ('2d106det', 'landmark106', '106'):
if method == '2d106det':
return Landmark106(**kwargs)
else:
available = ['2d106det', 'landmark106', '106']
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
if method == 'pipnet':
return PIPNet(**kwargs)
available = ['2d106det', 'pipnet']
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = ['BaseLandmarker', 'Landmark106', 'create_landmarker']
__all__ = ['BaseLandmarker', 'Landmark106', 'PIPNet', 'create_landmarker']

View File

@@ -0,0 +1,161 @@
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
#
# Mean-face arrays vendored from upstream PIPNet (MIT):
# https://github.com/jhb86253817/PIPNet/tree/master/data
from __future__ import annotations
import numpy as np
# fmt: off
# 300W layout: 68 landmarks, 136 floats.
MEANFACE_300W_68: tuple[float, ...] = (
0.05558998895410058, 0.23848280098218655, 0.05894856684324656, 0.3590187767402909,
0.0736574254414371, 0.4792196439871159, 0.09980016420365162, 0.5959029676167197,
0.14678670154995865, 0.7035615597409001, 0.21847188218752928, 0.7971705893013413,
0.30554692814599393, 0.8750572978073209, 0.4018434142644611, 0.9365018059444535,
0.5100536090382116, 0.9521295666029498, 0.6162039414413925, 0.9309467340899419,
0.7094522484942942, 0.8669275031738761, 0.7940993502957612, 0.7879369615524398,
0.8627063649669019, 0.6933756633633967, 0.9072386130534111, 0.5836975017700834,
0.9298874997796132, 0.4657004930314701, 0.9405202670724796, 0.346063993805527,
0.9425419553088846, 0.22558131891345742, 0.13304298285530403, 0.14853071838028062,
0.18873587368440375, 0.09596491613770254, 0.2673231915839219, 0.08084218279128136,
0.34878638553224905, 0.09253591849498964, 0.4226713753717798, 0.12466063383809506,
0.5618513152452376, 0.11839668911898667, 0.6394952560845826, 0.08480191391770678,
0.7204375851516752, 0.07249669092117161, 0.7988615904537885, 0.08766933146893043,
0.8534884939460948, 0.1380096813348583, 0.49610677423740546, 0.21516740699375395,
0.49709661403980665, 0.2928875699060973, 0.4982292618461611, 0.3699985379939941,
0.49982965173254235, 0.4494119144493957, 0.406772397599095, 0.5032397294041786,
0.45231994786363067, 0.5197953144002292, 0.49969685987914064, 0.5332489262413073,
0.5470074224053442, 0.518413595827126, 0.5892261151542287, 0.5023530079850803,
0.22414578747180394, 0.22835847349949062, 0.27262947128194215, 0.19915251892241678,
0.3306759252861797, 0.20026034220607236, 0.38044435864341913, 0.23839196034290633,
0.32884072789429913, 0.24902443794896897, 0.2707409300714473, 0.24950886025380967,
0.6086826011068529, 0.23465048639345917, 0.660397116846103, 0.1937087938594717,
0.7177815187666494, 0.19317079039835858, 0.7652328176062365, 0.22088822845258235,
0.722727677909097, 0.24195514178450958, 0.6658378927310327, 0.2441554205021945,
0.32894370935769124, 0.6496589505331646, 0.39347179739100613, 0.6216899667490776,
0.4571976492475472, 0.60794251109236, 0.4990484623797022, 0.6190124015360254,
0.5465555522325872, 0.6071477960565326, 0.6116127327356168, 0.6205387097430033,
0.6742318496058836, 0.6437466364395467, 0.6144773141699744, 0.7077526646009754,
0.5526442055374252, 0.7363350735898412, 0.5018120662554302, 0.7424476622366345,
0.4554458875556401, 0.7382303858617719, 0.3923750731597415, 0.7118887028663435,
0.35530766372404593, 0.6524479416354049, 0.457111071610868, 0.6467108367268608,
0.49974082228815025, 0.6508406774477011, 0.5477027224368399, 0.6451242819422733,
0.6478392760505715, 0.647852382880368, 0.5488474760115958, 0.6779061893042735,
0.5001073351044452, 0.6845280260362221, 0.4564831746654594, 0.6799300301441035,
)
# WFLW layout: 98 landmarks, 196 floats.
MEANFACE_WFLW_98: tuple[float, ...] = (
0.07960419395480703, 0.3921576875344978, 0.08315055593117261, 0.43509551571809146,
0.08675705281580391, 0.47810288286566444, 0.09141892980469117, 0.5210356946467262,
0.09839925903528965, 0.5637522280060038, 0.10871037524559955, 0.6060410614977951,
0.12314562992759207, 0.6475338700558225, 0.14242389255404694, 0.6877152027028081,
0.16706295456951875, 0.7259564546408682, 0.19693946055282413, 0.761730578566735,
0.23131827931527224, 0.7948205670466106, 0.2691730934906831, 0.825332081636482,
0.3099415030959131, 0.853325959406618, 0.3535202097901413, 0.8782538906229107,
0.40089023799272033, 0.8984102434399625, 0.4529251732310723, 0.9112191359814178,
0.5078640056794708, 0.9146712690731943, 0.5616519666079889, 0.9094327772020283,
0.6119216923689698, 0.8950540037623425, 0.6574617882337107, 0.8738084866764846,
0.6994820494908942, 0.8482660530943744, 0.7388135339780575, 0.8198750461527688,
0.775158750479601, 0.788989141243473, 0.8078785221990765, 0.7555462713420953,
0.8361052138935441, 0.7195542055115057, 0.8592123871172533, 0.6812759034843933,
0.8771159986952748, 0.6412243940605555, 0.8902481006481506, 0.5999743595282084,
0.8992952868651163, 0.5580032282594118, 0.9050110573289222, 0.5156548913779377,
0.908338439928252, 0.4731336721500472, 0.9104896075281127, 0.4305382486815422,
0.9124796341441906, 0.38798192678294363, 0.18465941635742913, 0.35063191749632183,
0.24110421889338157, 0.31190394310826886, 0.3003235400132397, 0.30828189837331976,
0.3603094923651325, 0.3135606490643205, 0.4171060234289877, 0.32433417646045615,
0.416842139562573, 0.3526729965541497, 0.36011177591813404, 0.3439660526998693,
0.3000863121140166, 0.33890077494044946, 0.24116055928407834, 0.34065620413845005,
0.5709736930161899, 0.321407825750195, 0.6305694459247149, 0.30972642336729495,
0.6895161625920927, 0.3036453838462943, 0.7488591859761683, 0.3069143844433495,
0.8030471337135181, 0.3435156012309415, 0.7485083446528741, 0.3348759588212388,
0.6893025057931884, 0.33403402013776456, 0.6304822892126991, 0.34038458762875695,
0.5710009285609654, 0.34988479902594455, 0.4954171902473609, 0.40202330022004634,
0.49604903449415433, 0.4592869389138444, 0.49644391662771625, 0.5162862508677217,
0.4981161256057368, 0.5703284628419502, 0.40749001573145566, 0.5983629921847019,
0.4537396729649631, 0.6057169923583451, 0.5007345777827058, 0.6116695615531077,
0.5448481727980428, 0.6044131443745976, 0.5882140504891681, 0.5961738788380111,
0.24303324896316683, 0.40721003719912746, 0.27771706732644313, 0.3907171413930685,
0.31847706697401107, 0.38417234007271117, 0.3621792860449715, 0.3900847721320633,
0.3965299162804086, 0.41071434661355205, 0.3586805562211872, 0.4203724421417311,
0.31847860588240934, 0.4237674602252073, 0.2789458001651631, 0.41942757306509065,
0.5938514626567266, 0.4090628827047304, 0.6303565516542536, 0.3864501652756091,
0.6774844732813035, 0.3809319896905685, 0.7150854850525555, 0.3875173254527522,
0.747519807465081, 0.4025187328459307, 0.7155172856447009, 0.4145958479293519,
0.680051949453018, 0.420041513473271, 0.6359056750107122, 0.41803782782566573,
0.33916483987223056, 0.6968581311227738, 0.40008790639758807, 0.6758101185779204,
0.47181947887764153, 0.6678850445191217, 0.5025394453374782, 0.6682917934792593,
0.5337748367911458, 0.6671949030019636, 0.6015915330083903, 0.6742535357237751,
0.6587068892667173, 0.6932163943648724, 0.6192795131720007, 0.7283129162844936,
0.5665923267827963, 0.7550248076404299, 0.5031303335863617, 0.7648348885181623,
0.4371030429958871, 0.7572539606688756, 0.3814909500115824, 0.7320595346122074,
0.35129809553480984, 0.6986839074746692, 0.4247987356100664, 0.69127609583798,
0.5027677238758598, 0.6911145821740593, 0.576997542122097, 0.6896269708051024,
0.6471352843446794, 0.6948977432227927, 0.5799932528781817, 0.7185288017567538,
0.5024914756021335, 0.7285408331555782, 0.4218115644247556, 0.7209126133193829,
0.3219750495122499, 0.40376441481225156, 0.6751136343101699, 0.40023415216110797,
)
# fmt: on
def _build_neighbor_indices(meanface: np.ndarray, num_nb: int) -> tuple[list[int], list[int], int]:
num_lms = meanface.shape[0]
meanface_indices: list[list[int]] = []
for i in range(num_lms):
pt = meanface[i]
dists = np.sum((pt - meanface) ** 2, axis=1)
indices = np.argsort(dists)
meanface_indices.append(indices[1 : 1 + num_nb].tolist())
reversed_map: dict[int, tuple[list[int], list[int]]] = {i: ([], []) for i in range(num_lms)}
for i in range(num_lms):
for j in range(num_nb):
neighbor = meanface_indices[i][j]
reversed_map[neighbor][0].append(i)
reversed_map[neighbor][1].append(j)
max_len = max(len(reversed_map[i][0]) for i in range(num_lms))
reverse_index1: list[int] = []
reverse_index2: list[int] = []
for i in range(num_lms):
idx1, idx2 = reversed_map[i]
# Pad by repeating entries so every landmark has the same neighbor count.
pad1 = (idx1 * max_len)[: max_len - len(idx1)]
pad2 = (idx2 * max_len)[: max_len - len(idx2)]
reverse_index1.extend(idx1 + pad1)
reverse_index2.extend(idx2 + pad2)
return reverse_index1, reverse_index2, max_len
def get_meanface_info(num_lms: int, num_nb: int = 10) -> tuple[np.ndarray, np.ndarray, int]:
"""Precomputed reverse-index tables for PIPNet decoding.
Args:
num_lms: 68 (300W) or 98 (WFLW).
num_nb: Neighbor count used at training time.
Returns:
``(reverse_index1, reverse_index2, max_len)``.
Raises:
ValueError: If ``num_lms`` does not match a shipped meanface table.
"""
if num_lms == 68:
flat = MEANFACE_300W_68
elif num_lms == 98:
flat = MEANFACE_WFLW_98
else:
raise ValueError(f'No meanface table available for num_lms={num_lms}; expected 68 or 98.')
meanface = np.asarray(flat, dtype=np.float32).reshape(-1, 2)
assert meanface.shape[0] == num_lms, f'meanface mismatch: expected {num_lms} points, got {meanface.shape[0]}'
r1, r2, max_len = _build_neighbor_indices(meanface, num_nb)
return np.asarray(r1, dtype=np.int64), np.asarray(r2, dtype=np.int64), max_len

240
uniface/landmark/pipnet.py Normal file
View File

@@ -0,0 +1,240 @@
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
import cv2
import numpy as np
from uniface.constants import PIPNetWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from ._meanface import get_meanface_info
from .base import BaseLandmarker
__all__ = ['PIPNet']
# PIPNet's upstream training preprocessing pads the bbox asymmetrically:
# +10% on the left, right, and bottom and -10% on the top before cropping.
_BBOX_PAD_RATIO = 0.1
class PIPNet(BaseLandmarker):
"""PIPNet facial landmark detector (98 or 68 points).
PIPNet (Pixel-in-Pixel Net) detects landmarks via a heatmap classification
head plus per-pixel offset and neighbor regression heads. The neighbor
predictions are gathered through a reverse-index table built from a
pre-trained meanface and then averaged with each landmark's own
prediction for sub-pixel accuracy.
Both the WFLW (98 points) and 300W+CelebA (68 points) variants share the
same ResNet-18 backbone and 256x256 input. The number of landmarks is
inferred from the ONNX output shape and the corresponding meanface table
is selected automatically.
Args:
model_name (PIPNetWeights): Which PIPNet ONNX model to load.
Defaults to ``PIPNetWeights.WFLW_98``.
providers (list[str] | None): ONNX Runtime execution providers. If None,
auto-detects the best available provider.
Example:
>>> from uniface.landmark import PIPNet
>>> from uniface.constants import PIPNetWeights
>>>
>>> landmarker = PIPNet() # WFLW_98 by default
>>> landmarks = landmarker.get_landmarks(image, bbox)
>>> print(landmarks.shape)
(98, 2)
>>>
>>> landmarker_68 = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68)
>>> landmarks_68 = landmarker_68.get_landmarks(image, bbox)
>>> print(landmarks_68.shape)
(68, 2)
"""
def __init__(
self,
model_name: PIPNetWeights = PIPNetWeights.WFLW_98,
providers: list[str] | None = None,
) -> None:
Logger.info(f'Initializing PIPNet with model={model_name}')
self.providers = providers
self.input_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
self.input_std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
# Number of meanface neighbors used at training time.
self.num_neighbors = 10
self.model_path = verify_model_weights(model_name)
self._initialize_model()
def _initialize_model(self) -> None:
"""Initialize the ONNX model and precompute the meanface tables.
Raises:
RuntimeError: If the model fails to load or initialize.
ValueError: If the model output channel count does not match a
supported meanface table (expected 68 or 98 landmarks).
"""
try:
self.session = create_onnx_session(self.model_path, providers=self.providers)
input_meta = self.session.get_inputs()[0]
self.input_name = input_meta.name
_, _, self.input_h, self.input_w = input_meta.shape
outputs = self.session.get_outputs()
self.output_names = [o.name for o in outputs]
cls_shape = outputs[0].shape # (1, num_lms, feat_h, feat_w)
self.num_lms = int(cls_shape[1])
self.feat_h = int(cls_shape[2]) if isinstance(cls_shape[2], int) else self.input_h // 32
self.feat_w = int(cls_shape[3]) if isinstance(cls_shape[3], int) else self.input_w // 32
self.net_stride = self.input_h // self.feat_h
self._reverse_index1, self._reverse_index2, self._max_len = get_meanface_info(
self.num_lms, self.num_neighbors
)
Logger.info(f'Model initialized with {self.num_lms} landmarks ({self.input_h}x{self.input_w} input)')
except ValueError:
raise
except Exception as e:
Logger.error(f"Failed to load PIPNet model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f'Failed to initialize PIPNet model: {e}') from e
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> tuple[np.ndarray, tuple[int, int, int, int]]:
"""Crop the face region and produce the network input blob.
The crop follows the upstream PIPNet convention: pad ``+10%`` on the
left, right, and bottom of the bbox and ``-10%`` on the top, then
clamp to the image bounds. The crop is resized to the model's input
resolution, BGR->RGB converted, and ImageNet-normalized.
Args:
image (np.ndarray): Full source image in BGR format, ``(H, W, 3)``.
bbox (np.ndarray): Face bounding box ``[x1, y1, x2, y2]``.
Returns:
Tuple of:
- The preprocessed ``(1, 3, H, W)`` float32 blob.
- The crop metadata ``(x1, y1, crop_w, crop_h)`` used to
rescale predictions back to original image coordinates.
"""
crop, crop_meta = self._crop_face(image, bbox)
resized = cv2.resize(crop, (self.input_w, self.input_h))
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
rgb = (rgb - self.input_mean) / self.input_std
blob = np.transpose(rgb, (2, 0, 1))[None, ...]
return np.ascontiguousarray(blob, dtype=np.float32), crop_meta
def _crop_face(self, image: np.ndarray, bbox: np.ndarray) -> tuple[np.ndarray, tuple[int, int, int, int]]:
img_h, img_w = image.shape[:2]
x1, y1, x2, y2 = (float(v) for v in bbox[:4])
det_w = x2 - x1 + 1
det_h = y2 - y1 + 1
# Asymmetric: +10% left/right/bottom, -10% top.
x1 -= int(det_w * _BBOX_PAD_RATIO)
y1 += int(det_h * _BBOX_PAD_RATIO)
x2 += int(det_w * _BBOX_PAD_RATIO)
y2 += int(det_h * _BBOX_PAD_RATIO)
x1 = max(int(x1), 0)
y1 = max(int(y1), 0)
x2 = min(int(x2), img_w - 1)
y2 = min(int(y2), img_h - 1)
crop_w = x2 - x1 + 1
crop_h = y2 - y1 + 1
crop = image[y1 : y2 + 1, x1 : x2 + 1, :]
return crop, (x1, y1, crop_w, crop_h)
def postprocess(
self,
outputs: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray],
crop_meta: tuple[int, int, int, int],
) -> np.ndarray:
"""Decode raw network outputs into original-image landmark coordinates.
Combines each landmark's own (cls, offset) prediction with the
predictions made about it by its ``num_nb`` nearest meanface neighbors,
then maps the normalized result back to the original image using the
crop metadata.
Args:
outputs (tuple): The five raw ONNX outputs in order
``(cls_map, offset_x, offset_y, nb_x, nb_y)``.
crop_meta (tuple): The ``(x1, y1, crop_w, crop_h)`` returned by
:meth:`preprocess`.
Returns:
np.ndarray: ``(num_lms, 2)`` float32 landmarks in original image space.
"""
cls_map, offset_x, offset_y, nb_x, nb_y = outputs
n = self.num_lms
nb = self.num_neighbors
h = self.feat_h
w = self.feat_w
cls_flat = cls_map.reshape(n, h * w)
max_ids = np.argmax(cls_flat, axis=1)
cols = (max_ids % w).astype(np.float32)
rows = (max_ids // w).astype(np.float32)
off_x_flat = offset_x.reshape(n, h * w)
off_y_flat = offset_y.reshape(n, h * w)
own_x = np.take_along_axis(off_x_flat, max_ids[:, None], axis=1).squeeze(1)
own_y = np.take_along_axis(off_y_flat, max_ids[:, None], axis=1).squeeze(1)
# Neighbor channels are ordered (num_nb * num_lms); reshape so axis 1 is neighbor index.
nb_x_flat = nb_x.reshape(n, nb, h * w)
nb_y_flat = nb_y.reshape(n, nb, h * w)
nb_ids = np.broadcast_to(max_ids[:, None, None], (n, nb, 1))
nb_own_x = np.take_along_axis(nb_x_flat, nb_ids, axis=2).squeeze(2)
nb_own_y = np.take_along_axis(nb_y_flat, nb_ids, axis=2).squeeze(2)
scale_x = self.input_w / self.net_stride
scale_y = self.input_h / self.net_stride
pred_x = (cols + own_x) / scale_x
pred_y = (rows + own_y) / scale_y
nb_pred_x = (cols[:, None] + nb_own_x) / scale_x
nb_pred_y = (rows[:, None] + nb_own_y) / scale_y
# Reverse gather: collect predictions about landmark i made by its neighbors,
# then average with landmark i's own prediction.
rev_x = nb_pred_x.reshape(-1)[self._reverse_index1 * nb + self._reverse_index2].reshape(n, self._max_len)
rev_y = nb_pred_y.reshape(-1)[self._reverse_index1 * nb + self._reverse_index2].reshape(n, self._max_len)
merged_x = np.mean(np.concatenate([pred_x[:, None], rev_x], axis=1), axis=1)
merged_y = np.mean(np.concatenate([pred_y[:, None], rev_y], axis=1), axis=1)
x1, y1, crop_w, crop_h = crop_meta
merged_x = merged_x * crop_w + x1
merged_y = merged_y * crop_h + y1
return np.stack([merged_x, merged_y], axis=1).astype(np.float32)
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""Predict facial landmarks for the given face bounding box.
Args:
image (np.ndarray): Full source image in BGR format.
bbox (np.ndarray): Face bounding box ``[x1, y1, x2, y2]``.
Returns:
np.ndarray: Landmark points as a ``(num_lms, 2)`` float32 array
in the original image's pixel coordinates.
"""
blob, crop_meta = self.preprocess(image, bbox)
outputs = self.session.run(self.output_names, {self.input_name: blob})
return self.postprocess(outputs, crop_meta)

View File

@@ -2,12 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Logging utilities for UniFace.
This module provides a centralized logger for the UniFace library,
allowing users to enable verbose logging when debugging or developing.
"""
from __future__ import annotations
import logging

View File

@@ -2,12 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Model weight management for UniFace.
This module handles downloading, caching, and verifying model weights
using SHA-256 checksums for integrity validation.
"""
from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -159,7 +153,7 @@ def download_file(url: str, dest_path: str, timeout: int = 60, max_retries: int
unit_divisor=1024,
) as progress,
):
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
for chunk in response.iter_content(chunk_size=const.DOWNLOAD_CHUNK_SIZE):
if chunk:
file.write(chunk)
progress.update(len(chunk))
@@ -178,7 +172,7 @@ def verify_file_hash(file_path: str, expected_hash: str) -> bool:
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
file_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b''):
for chunk in iter(lambda: f.read(const.HASH_CHUNK_SIZE), b''):
file_hash.update(chunk)
actual_hash = file_hash.hexdigest()
if actual_hash != expected_hash:
@@ -187,7 +181,7 @@ def verify_file_hash(file_path: str, expected_hash: str) -> bool:
def download_models(
model_names: list[Enum], max_workers: int = 4, timeout: int = 60, max_retries: int = 3
model_names: list[Enum], max_workers: int | None = None, timeout: int = 60, max_retries: int = 3
) -> dict[Enum, str]:
"""Download and verify multiple models concurrently.
@@ -214,6 +208,17 @@ def download_models(
results: dict[Enum, str] = {}
errors: list[str] = []
if isinstance(max_workers, bool) or not isinstance(max_workers, int | None):
raise TypeError(f'max_workers must be int or None, got {type(max_workers).__name__}')
if max_workers is None or max_workers < 1:
if max_workers < 1:
Logger.info(f'max_workers must be >= 1, got {max_workers}; falling back to auto mode')
max_workers = min(os.cpu_count(), 8) # at most 8
if max_workers < 1:
raise ValueError(f'max_workers must be >= 1, got {max_workers}')
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_model = {
executor.submit(verify_model_weights, name, timeout=timeout, max_retries=max_retries): name

View File

@@ -6,6 +6,7 @@
import cv2
import numpy as np
from uniface.common import softmax
from uniface.constants import MiniFASNetWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
@@ -179,11 +180,6 @@ class MiniFASNet(BaseSpoofer):
return face
def _softmax(self, x: np.ndarray) -> np.ndarray:
"""Apply softmax to logits along axis 1."""
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return e_x / e_x.sum(axis=1, keepdims=True)
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
"""
Postprocess raw model outputs into prediction result.
@@ -197,7 +193,7 @@ class MiniFASNet(BaseSpoofer):
Returns:
SpoofingResult: Result containing is_real flag and confidence score.
"""
probs = self._softmax(outputs)
probs = softmax(outputs)
label_idx = int(np.argmax(probs))
confidence = float(probs[0, label_idx])

View File

@@ -115,7 +115,7 @@ class FAISS(BaseStore):
return None, similarity
def remove(self, key: str, value: Any) -> int:
"""Remove all entries where ``metadata[key] == value`` and rebuild.
"""Remove all entries where ``metadata[key] == value``.
Args:
key: Metadata key to match against.
@@ -126,22 +126,19 @@ class FAISS(BaseStore):
"""
faiss = _import_faiss()
keep = [i for i, m in enumerate(self.metadata) if m.get(key) != value]
removed = len(self.metadata) - len(keep)
if removed == 0:
to_remove = [i for i, m in enumerate(self.metadata) if m.get(key) == value]
if not to_remove:
return 0
if keep:
vectors = np.empty((len(keep), self.embedding_size), dtype=np.float32)
for dst, src in enumerate(keep):
self.index.reconstruct(src, vectors[dst])
new_index = faiss.IndexFlatIP(self.embedding_size)
new_index.add(vectors)
else:
new_index = faiss.IndexFlatIP(self.embedding_size)
ids = np.array(to_remove, dtype=np.int64)
self.index.remove_ids(faiss.IDSelectorBatch(ids))
self.index = new_index
self.metadata = [self.metadata[i] for i in keep]
# IndexFlatIP.remove_ids preserves the relative order of survivors,
# so deleting the same positions from metadata keeps them aligned.
drop = set(to_remove)
self.metadata = [m for i, m in enumerate(self.metadata) if i not in drop]
removed = len(to_remove)
Logger.info('Removed %d entries where %s=%s (%d remaining)', removed, key, value, self.index.ntotal)
return removed

1769
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff