Compare commits
41 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7882ec5cb4 | ||
|
|
d51d030545 | ||
|
|
5a767847da | ||
|
|
4a22f903f0 | ||
|
|
43a46e11df | ||
|
|
025b93ab8b | ||
|
|
8bf87d958f | ||
|
|
b813dc2ee7 | ||
|
|
73fc291930 | ||
|
|
400bb72217 | ||
|
|
a0a12d5eca | ||
|
|
a34f376da0 | ||
|
|
2b29706615 | ||
|
|
f6d3cf33f0 | ||
|
|
0eb042425c | ||
|
|
35c0b6d539 | ||
|
|
13c4ac83d8 | ||
|
|
6ce397b811 | ||
|
|
9bf54f5f78 | ||
|
|
c87ec1ad0f | ||
|
|
9e56a86963 | ||
|
|
426bd71505 | ||
|
|
ede8b27091 | ||
|
|
02c77ce5db | ||
|
|
d70d6a254f | ||
|
|
7d37633b1a | ||
|
|
bc413df4a8 | ||
|
|
8db0577991 | ||
|
|
3682a2124f | ||
|
|
2ef6a1ebe8 | ||
|
|
78a2dba7c7 | ||
|
|
87e496d1f5 | ||
|
|
5604ebf4f1 | ||
|
|
971775b2e8 | ||
|
|
c520ea2df2 | ||
|
|
2a8cb54d31 | ||
|
|
331f46be7c | ||
|
|
9991fae62a | ||
|
|
b74ab95d39 | ||
|
|
d2b0303bfe | ||
|
|
5f74487eb3 |
BIN
.github/logos/gaze_crop.png
vendored
|
Before Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
|
Before Width: | Height: | Size: 673 KiB |
BIN
.github/logos/logo_preview.jpg
vendored
|
Before Width: | Height: | Size: 826 KiB |
BIN
.github/logos/logo_readme.png
vendored
|
Before Width: | Height: | Size: 563 KiB |
BIN
.github/logos/logo_web.webp
vendored
|
Before Width: | Height: | Size: 33 KiB |
BIN
.github/logos/uniface_enhanced.webp
vendored
Normal file
|
After Width: | Height: | Size: 427 KiB |
BIN
.github/logos/uniface_high_res_original.png
vendored
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
.github/logos/uniface_rounded.png
vendored
Normal file
|
After Width: | Height: | Size: 1.8 MiB |
BIN
.github/logos/uniface_rounded_150px.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 MiB |
BIN
.github/logos/uniface_rounded_q80.png
vendored
Normal file
|
After Width: | Height: | Size: 872 KiB |
BIN
.github/logos/uniface_rounded_q80.webp
vendored
Normal file
|
After Width: | Height: | Size: 62 KiB |
35
.github/workflows/ci.yml
vendored
@@ -17,10 +17,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
@@ -35,8 +35,14 @@ jobs:
|
||||
# Full Python range on Linux (fastest runner)
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.11"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.12"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.13"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.14"
|
||||
- os: macos-latest
|
||||
python-version: "3.13"
|
||||
- os: windows-latest
|
||||
@@ -44,28 +50,25 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
run: uv sync --locked --extra cpu --extra dev
|
||||
|
||||
- name: Check ONNX Runtime providers
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
run: uv run python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
run: uv run pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
run: uv run python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -74,10 +77,10 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
20
.github/workflows/docs.yml
vendored
@@ -1,8 +1,6 @@
|
||||
name: Deploy docs
|
||||
name: Deploy Documentation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
@@ -12,26 +10,28 @@ jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # Fetch full history for git-committers and git-revision-date plugins
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
|
||||
run: uv sync --locked --extra docs
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
|
||||
run: mkdocs build --strict
|
||||
run: uv run mkdocs build --strict
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v4
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
|
||||
229
.github/workflows/pipeline.yml
vendored
Normal file
@@ -0,0 +1,229 @@
|
||||
name: Release Pipeline
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version (e.g. 3.6.0, 3.6.0b1, 3.6.0rc1)'
|
||||
required: true
|
||||
|
||||
concurrency:
|
||||
group: pipeline
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
is_prerelease: ${{ steps.prerelease.outputs.is_prerelease }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Validate version (PEP 440)
|
||||
run: |
|
||||
python - <<'EOF'
|
||||
import re, sys
|
||||
v = "${{ inputs.version }}"
|
||||
if not re.fullmatch(r'\d+\.\d+\.\d+((a|b|rc)\d+|\.dev\d+)?', v):
|
||||
print(f"Invalid version: {v}")
|
||||
print("Expected forms: 3.6.0, 3.6.0a1, 3.6.0b1, 3.6.0rc1, 3.6.0.dev1")
|
||||
sys.exit(1)
|
||||
EOF
|
||||
|
||||
- name: Check tag does not exist
|
||||
run: |
|
||||
if git rev-parse "v${{ inputs.version }}" >/dev/null 2>&1; then
|
||||
echo "Tag v${{ inputs.version }} already exists."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Detect pre-release
|
||||
id: prerelease
|
||||
run: |
|
||||
if [[ "${{ inputs.version }}" =~ (a|b|rc|\.dev)[0-9]+ ]]; then
|
||||
echo "is_prerelease=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "is_prerelease=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --locked --extra cpu --extra dev
|
||||
|
||||
- name: Run tests
|
||||
run: uv run pytest -v --tb=short
|
||||
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
needs: test
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.RELEASE_TOKEN }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Update pyproject.toml
|
||||
run: |
|
||||
python - <<'EOF'
|
||||
import re, pathlib
|
||||
p = pathlib.Path('pyproject.toml')
|
||||
text = p.read_text()
|
||||
new = re.sub(r'^version\s*=\s*".*"', f'version = "${{ inputs.version }}"', text, count=1, flags=re.M)
|
||||
if new == text:
|
||||
raise SystemExit("Failed to update version in pyproject.toml")
|
||||
p.write_text(new)
|
||||
EOF
|
||||
|
||||
- name: Update uniface/__init__.py
|
||||
run: |
|
||||
python - <<'EOF'
|
||||
import re, pathlib
|
||||
p = pathlib.Path('uniface/__init__.py')
|
||||
text = p.read_text()
|
||||
new = re.sub(r"^__version__\s*=\s*'.*'", f"__version__ = '${{ inputs.version }}'", text, count=1, flags=re.M)
|
||||
if new == text:
|
||||
raise SystemExit("Failed to update __version__ in uniface/__init__.py")
|
||||
p.write_text(new)
|
||||
EOF
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Refresh uv.lock with new project version
|
||||
run: uv lock --upgrade-package uniface
|
||||
|
||||
- name: Commit, tag, push
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add pyproject.toml uniface/__init__.py uv.lock
|
||||
git commit -m "chore: Release v${{ inputs.version }}"
|
||||
git tag "v${{ inputs.version }}"
|
||||
git push origin HEAD:${{ github.ref_name }}
|
||||
git push origin "v${{ inputs.version }}"
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, release]
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/project/uniface/
|
||||
|
||||
steps:
|
||||
- name: Checkout tag
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
ref: v${{ inputs.version }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: twine check dist/*
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: v${{ inputs.version }}
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
prerelease: ${{ needs.validate.outputs.is_prerelease }}
|
||||
|
||||
docs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, publish]
|
||||
if: needs.validate.outputs.is_prerelease == 'false'
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Checkout tag
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
ref: v${{ inputs.version }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: uv sync --locked --extra docs
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
|
||||
run: uv run mkdocs build --strict
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v4
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
destination_dir: docs
|
||||
119
.github/workflows/publish.yml
vendored
@@ -1,119 +0,0 @@
|
||||
name: Publish to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11" # Needs 3.11+ for tomllib
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
echo "pyproject.toml version: $PYPROJECT_VERSION"
|
||||
|
||||
- name: Verify version match
|
||||
run: |
|
||||
if [ "${{ steps.get_version.outputs.tag_version }}" != "${{ steps.get_version.outputs.version }}" ]; then
|
||||
echo "Error: Tag version (${{ steps.get_version.outputs.tag_version }}) does not match pyproject.toml version (${{ steps.get_version.outputs.version }})"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validation passed: ${{ steps.get_version.outputs.version }}"
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/project/uniface/
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: twine check dist/*
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
tmp_*
|
||||
.vscode/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
@@ -18,6 +18,13 @@ repos:
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Strip Jupyter notebook outputs
|
||||
- repo: https://github.com/kynan/nbstripout
|
||||
rev: 0.9.1
|
||||
hooks:
|
||||
- id: nbstripout
|
||||
files: ^examples/
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
|
||||
6
AGENTS.md
Normal file
@@ -0,0 +1,6 @@
|
||||
<!-- Cursor agent instructions — shared with CLAUDE.md -->
|
||||
<!-- See CLAUDE.md for full project instructions for AI coding agents. -->
|
||||
|
||||
# AGENTS.md
|
||||
|
||||
Please read and follow all instructions in [CLAUDE.md](./CLAUDE.md).
|
||||
81
CLAUDE.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# CLAUDE.md
|
||||
|
||||
Project instructions for AI coding agents.
|
||||
|
||||
## Project Overview
|
||||
|
||||
UniFace is a Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, age/gender detection. It uses ONNX Runtime for inference.
|
||||
|
||||
## Code Style
|
||||
|
||||
- Python 3.10+ with type hints
|
||||
- Line length: 120
|
||||
- Single quotes for strings, double quotes for docstrings
|
||||
- Google-style docstrings
|
||||
- Formatter/linter: Ruff (config in `pyproject.toml`)
|
||||
- Run `ruff format .` and `ruff check . --fix` before committing
|
||||
|
||||
## Commit Messages
|
||||
|
||||
Follow [Conventional Commits](https://www.conventionalcommits.org/) with a **capitalized** description:
|
||||
|
||||
```
|
||||
<type>: <Capitalized short description>
|
||||
```
|
||||
|
||||
Types: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`
|
||||
|
||||
Examples:
|
||||
- `feat: Add gaze estimation model`
|
||||
- `fix: Correct bounding box scaling for non-square images`
|
||||
- `ci: Add nbstripout pre-commit hook`
|
||||
- `docs: Update installation instructions`
|
||||
- `refactor: Unify attribute/detector base classes`
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
pytest -v --tb=short
|
||||
```
|
||||
|
||||
Tests live in `tests/`. Run the full suite before submitting changes.
|
||||
|
||||
## Pre-commit
|
||||
|
||||
Pre-commit hooks handle formatting, linting, security checks, and notebook output stripping. Always run:
|
||||
|
||||
```bash
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
uniface/ # Main package
|
||||
detection/ # Face detection models (SCRFD, RetinaFace, YOLOv5, YOLOv8)
|
||||
recognition/ # Face recognition/verification (AdaFace, ArcFace, EdgeFace, MobileFace, SphereFace)
|
||||
landmark/ # Facial landmark models
|
||||
tracking/ # Object tracking (ByteTrack)
|
||||
parsing/ # Face parsing/segmentation (BiSeNet, XSeg)
|
||||
gaze/ # Gaze estimation
|
||||
headpose/ # Head pose estimation
|
||||
attribute/ # Age, gender, emotion detection
|
||||
spoofing/ # Anti-spoofing (MiniFASNet)
|
||||
privacy/ # Face anonymization
|
||||
stores/ # Vector stores (FAISS)
|
||||
constants.py # Model weight URLs and checksums
|
||||
model_store.py # Model download/cache management
|
||||
analyzer.py # High-level FaceAnalyzer API
|
||||
types.py # Shared type definitions
|
||||
tests/ # Unit tests
|
||||
examples/ # Jupyter notebooks (outputs are auto-stripped)
|
||||
docs/ # MkDocs documentation
|
||||
```
|
||||
|
||||
## Key Conventions
|
||||
|
||||
- New models: add class in submodule, register weights in `constants.py`, export in `__init__.py`
|
||||
- Dependencies: managed in `pyproject.toml`
|
||||
- All ONNX models are downloaded on demand with SHA256 verification
|
||||
- Do not commit notebook outputs; `nbstripout` pre-commit hook handles this
|
||||
@@ -21,25 +21,31 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
|
||||
|
||||
## Development Setup
|
||||
|
||||
We use [uv](https://docs.astral.sh/uv/) for reproducible dev installs. The committed `uv.lock` pins every transitive dependency so contributors and CI resolve to identical versions.
|
||||
|
||||
```bash
|
||||
# Install uv (https://docs.astral.sh/uv/getting-started/installation/)
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Sync runtime + cpu + dev extras from uv.lock (use --extra gpu instead of cpu for CUDA)
|
||||
uv sync --extra cpu --extra dev
|
||||
```
|
||||
|
||||
`uv sync` creates a project-local `.venv/` and installs everything pinned in `uv.lock`. Run commands with `uv run <cmd>` (e.g. `uv run pytest`), or activate the venv with `source .venv/bin/activate`.
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. `pre-commit` is included in the `[dev]` extra, so it's already installed after `uv sync`.
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
uv run pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
uv run pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
@@ -82,23 +88,23 @@ def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
method: Detection method. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> from uniface import create_detector
|
||||
>>> detector = create_detector('retinaface', confidence_threshold=0.8)
|
||||
>>> faces = detector.detect(image)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
@@ -174,16 +180,58 @@ When adding a new model or feature:
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Example | Notebook |
|
||||
| ------------------ | ------------------------------------------------------------------- |
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
| Face Segmentation | [09_face_segmentation.ipynb](examples/09_face_segmentation.ipynb) |
|
||||
| Face Vector Store | [10_face_vector_store.ipynb](examples/10_face_vector_store.ipynb) |
|
||||
| Head Pose Estimation | [11_head_pose_estimation.ipynb](examples/11_head_pose_estimation.ipynb) |
|
||||
|
||||
## Release Process
|
||||
|
||||
Releases are fully automated via GitHub Actions. Only maintainers with branch-protection bypass privileges on `main` can trigger a release.
|
||||
|
||||
### Cutting a release
|
||||
|
||||
1. Go to **Actions → Release Pipeline → Run workflow** on GitHub.
|
||||
2. Enter the version following [PEP 440](https://peps.python.org/pep-0440/):
|
||||
- Stable: `0.7.0`, `1.0.0`
|
||||
- Pre-release: `0.7.0rc1`, `0.7.0b1`, `0.7.0a1`, `0.7.0.dev1`
|
||||
3. Click **Run workflow**.
|
||||
|
||||
### What happens automatically
|
||||
|
||||
The `Release Pipeline` workflow runs all stages in sequence:
|
||||
|
||||
1. **Validate** — checks the version string against PEP 440 and confirms the tag does not already exist.
|
||||
2. **Test** — runs the test suite on Python 3.10–3.14.
|
||||
3. **Release** — updates `pyproject.toml` and `uniface/__init__.py`, commits `chore: Release vX.Y.Z` to `main`, creates and pushes tag `vX.Y.Z`.
|
||||
4. **Publish** — builds the package, uploads to PyPI, and creates a GitHub Release (flagged as pre-release for `a`/`b`/`rc`/`.dev` versions).
|
||||
5. **Deploy docs** — runs only for **stable** versions. Pre-releases do not update the live documentation site.
|
||||
|
||||
### Verifying a release
|
||||
|
||||
- PyPI: <https://pypi.org/project/uniface/>
|
||||
- GitHub Releases: <https://github.com/yakhyo/uniface/releases>
|
||||
- Docs (stable only): <https://yakhyo.github.io/uniface/>
|
||||
|
||||
### Installing a pre-release
|
||||
|
||||
End users can opt in to pre-releases with the `--pre` flag:
|
||||
|
||||
```bash
|
||||
pip install uniface --pre # latest pre-release
|
||||
pip install uniface==0.7.0rc1 # specific pre-release
|
||||
```
|
||||
|
||||
Without `--pre`, `pip install uniface` always resolves to the latest stable version.
|
||||
|
||||
## Questions?
|
||||
|
||||
|
||||
290
README.md
@@ -1,67 +1,149 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
<h1 align="center">UniFace: A Unified Face Analysis Library for Python</h1>
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=80%>
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/uniface_rounded_q80.webp" width="90%" alt="UniFace - A Unified Face Analysis Library for Python">
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
---
|
||||
|
||||
> 💬 **Have questions?** [Chat with this codebase on DeepWiki](https://deepwiki.com/yakhyo/uniface) - AI-powered docs that let you ask anything about UniFace.
|
||||
**UniFace** is a lightweight, production-ready Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, and face attributes.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Facial Landmarks** — 106-point landmark localization
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes)
|
||||
- **Face Recognition** — AdaFace, ArcFace, EdgeFace, MobileFace, and SphereFace embeddings
|
||||
- **Face Tracking** — Multi-object tracking with [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) for persistent IDs across video frames
|
||||
- **Facial Landmarks** — 106-point (2d106det) and 98 / 68-point (PIPNet) landmark localization (separate from the 5-point detector landmarks)
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes), XSeg face masking
|
||||
- **Portrait Matting** — Trimap-free alpha matte with MODNet (background removal, green screen, compositing)
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
- **Head Pose Estimation** — 3D head orientation (pitch, yaw, roll) with 6D rotation representation
|
||||
- **Attribute Analysis** — Age, gender, race (FairFace), and emotion
|
||||
- **Vector Store** — FAISS-backed embedding store for fast multi-identity search
|
||||
- **Anti-Spoofing** — Face liveness detection with MiniFASNet
|
||||
- **Face Anonymization** — 5 blur methods for privacy protection
|
||||
- **Hardware Acceleration** — ARM64 (Apple Silicon), CUDA (NVIDIA), CPU
|
||||
|
||||
---
|
||||
|
||||
## Visual Examples
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td align="center"><b>Face Detection</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/detection.jpg" width="100%"></td>
|
||||
<td align="center"><b>Gaze Estimation</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/gaze.jpg" width="100%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center"><b>Head Pose Estimation</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/headpose.jpg" width="100%"></td>
|
||||
<td align="center"><b>Age & Gender</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/age_gender.jpg" width="100%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" colspan="2"><b>Face Verification</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/verification.jpg" width="80%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" colspan="2"><b>106-Point Landmarks</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/landmarks.jpg" width="36%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" colspan="2"><b>Face Parsing</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/parsing.jpg" width="80%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" colspan="2"><b>Face Segmentation</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/segmentation.jpg" width="80%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" colspan="2"><b>Portrait Matting</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/matting.jpg" width="100%"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center" colspan="2"><b>Face Anonymization</b><br><img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/demos/anonymization.jpg" width="100%"></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
**CPU / Apple Silicon**
|
||||
|
||||
```bash
|
||||
# Standard installation
|
||||
pip install uniface
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
# GPU support (CUDA)
|
||||
**GPU support (NVIDIA CUDA)**
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
# From source
|
||||
> **Why separate extras?** `onnxruntime` and `onnxruntime-gpu` conflict when both are installed — they own the same Python namespace. Installing only the extra you need prevents that conflict entirely.
|
||||
|
||||
**From source (latest version)**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface && pip install -e .
|
||||
cd uniface && pip install -e ".[cpu]" # or .[gpu] for CUDA
|
||||
```
|
||||
|
||||
**FAISS vector store**
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # or faiss-gpu for CUDA
|
||||
```
|
||||
|
||||
**Optional dependencies**
|
||||
- Emotion model uses TorchScript and requires `torch`:
|
||||
`pip install torch` (choose the correct build for your OS/CUDA)
|
||||
- YOLOv5-Face and YOLOv8-Face support faster NMS with `torchvision`:
|
||||
`pip install torch torchvision` then use `nms_mode='torchvision'`
|
||||
|
||||
---
|
||||
|
||||
## Model Downloads and Cache
|
||||
|
||||
Models are downloaded automatically on first use and verified via SHA-256.
|
||||
|
||||
Default cache location: `~/.uniface/models`
|
||||
|
||||
Override with the programmatic API or environment variable:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
```
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/data/models
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Example
|
||||
## Quick Example (Detection)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
image = cv2.imread("photo.jpg")
|
||||
if image is None:
|
||||
raise ValueError("Failed to load image. Check the path to 'photo.jpg'.")
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
@@ -71,14 +153,94 @@ for face in faces:
|
||||
```
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/test_result.png" width="90%">
|
||||
<p>Face Detection Model Output</p>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Example (Face Analyzer)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
# Zero-config: uses SCRFD (500M) + ArcFace (MobileNet) by default
|
||||
analyzer = FaceAnalyzer()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
if image is None:
|
||||
raise ValueError("Failed to load image. Check the path to 'photo.jpg'.")
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(face.bbox, face.embedding.shape if face.embedding is not None else None)
|
||||
```
|
||||
|
||||
With attributes:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer, AgeGender
|
||||
|
||||
analyzer = FaceAnalyzer(attributes=[AgeGender()])
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"{face.sex}, {face.age}y, embedding={face.embedding.shape}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Example (Portrait Matting)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.matting import MODNet
|
||||
|
||||
matting = MODNet()
|
||||
|
||||
image = cv2.imread("portrait.jpg")
|
||||
matte = matting.predict(image) # (H, W) float32 in [0, 1]
|
||||
|
||||
# Transparent PNG
|
||||
rgba = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
|
||||
rgba[:, :, 3] = (matte * 255).astype(np.uint8)
|
||||
cv2.imwrite("transparent.png", rgba)
|
||||
|
||||
# Green screen
|
||||
matte_3ch = matte[:, :, np.newaxis]
|
||||
bg = np.full_like(image, (0, 177, 64), dtype=np.uint8)
|
||||
result = (image * matte_3ch + bg * (1 - matte_3ch)).astype(np.uint8)
|
||||
cv2.imwrite("green_screen.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Jupyter Notebooks
|
||||
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
| [01_face_detection.ipynb](examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Face detection and landmarks |
|
||||
| [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Face alignment for recognition |
|
||||
| [03_face_verification.ipynb](examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [04_face_search.ipynb](examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | Unified face analysis |
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
| [09_face_segmentation.ipynb](examples/09_face_segmentation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | Face segmentation with XSeg |
|
||||
| [10_face_vector_store.ipynb](examples/10_face_vector_store.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | FAISS-backed face database |
|
||||
| [11_head_pose_estimation.ipynb](examples/11_head_pose_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/11_head_pose_estimation.ipynb) | Head pose estimation (pitch, yaw, roll) |
|
||||
| [12_face_recognition.ipynb](examples/12_face_recognition.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/12_face_recognition.ipynb) | Standalone face recognition pipeline |
|
||||
| [13_portrait_matting.ipynb](examples/13_portrait_matting.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/13_portrait_matting.ipynb) | Portrait matting with MODNet |
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
📚 **Full documentation**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface/)
|
||||
Full documentation: https://yakhyo.github.io/uniface/
|
||||
|
||||
| Resource | Description |
|
||||
|----------|-------------|
|
||||
@@ -87,19 +249,53 @@ for face in faces:
|
||||
| [API Reference](https://yakhyo.github.io/uniface/modules/detection/) | Detailed module documentation |
|
||||
| [Tutorials](https://yakhyo.github.io/uniface/recipes/image-pipeline/) | Step-by-step workflow examples |
|
||||
| [Guides](https://yakhyo.github.io/uniface/concepts/overview/) | Architecture and design principles |
|
||||
| [Datasets](https://yakhyo.github.io/uniface/datasets/) | Training data and evaluation benchmarks |
|
||||
|
||||
### Jupyter Notebooks
|
||||
---
|
||||
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
| [01_face_detection.ipynb](examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Face detection and landmarks |
|
||||
| [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Face alignment for recognition |
|
||||
| [03_face_verification.ipynb](examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [04_face_search.ipynb](examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one analysis |
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
## Execution Providers (ONNX Runtime)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Force CPU-only inference
|
||||
detector = RetinaFace(providers=["CPUExecutionProvider"])
|
||||
```
|
||||
|
||||
See more in the docs:
|
||||
https://yakhyo.github.io/uniface/concepts/execution-providers/
|
||||
|
||||
---
|
||||
|
||||
## Datasets
|
||||
|
||||
| Task | Training Dataset | Models |
|
||||
|------|-----------------|--------|
|
||||
| Detection | WIDER FACE | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
| Recognition | MS1MV2 | MobileFace, SphereFace |
|
||||
| Recognition | WebFace600K | ArcFace |
|
||||
| Recognition | WebFace4M / 12M | AdaFace |
|
||||
| Recognition | MS1MV2 | EdgeFace |
|
||||
| Landmarks | WFLW, 300W+CelebA | PIPNet (98 / 68 pts) |
|
||||
| Gaze | Gaze360 | MobileGaze |
|
||||
| Head Pose | 300W-LP | HeadPose (ResNet, MobileNet) |
|
||||
| Parsing | CelebAMask-HQ | BiSeNet |
|
||||
| Attributes | CelebA, FairFace, AffectNet | AgeGender, FairFace, Emotion |
|
||||
|
||||
> See [Datasets documentation](https://yakhyo.github.io/uniface/datasets/) for download links, benchmarks, and details.
|
||||
|
||||
---
|
||||
|
||||
## Licensing and Model Usage
|
||||
|
||||
UniFace is MIT-licensed, but several pretrained models carry their own licenses.
|
||||
Review: https://yakhyo.github.io/uniface/license-attribution/
|
||||
|
||||
Notable examples:
|
||||
- YOLOv5-Face and YOLOv8-Face weights are GPL-3.0
|
||||
- FairFace weights are CC BY 4.0
|
||||
|
||||
If you plan commercial use, verify model license compatibility.
|
||||
|
||||
---
|
||||
|
||||
@@ -107,12 +303,18 @@ for face in faces:
|
||||
|
||||
| Feature | Repository | Training | Description |
|
||||
|---------|------------|:--------:|-------------|
|
||||
| Detection | [retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | [x] | RetinaFace PyTorch Training & Export |
|
||||
| Detection | [retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | ✓ | RetinaFace PyTorch Training & Export |
|
||||
| Detection | [yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | - | YOLOv5-Face ONNX Inference |
|
||||
| Detection | [yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | - | YOLOv8-Face ONNX Inference |
|
||||
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | [x] | MobileFace, SphereFace Training |
|
||||
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | [x] | BiSeNet Face Parsing |
|
||||
| Gaze | [gaze-estimation](https://github.com/yakhyo/gaze-estimation) | [x] | MobileGaze Training |
|
||||
| Tracking | [bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) | - | BYTETracker Multi-Object Tracking |
|
||||
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | ✓ | MobileFace, SphereFace Training |
|
||||
| Recognition | [edgeface-onnx](https://github.com/yakhyo/edgeface-onnx) | - | EdgeFace ONNX Inference |
|
||||
| Landmarks | [pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) | - | PIPNet 98 / 68-point ONNX Inference |
|
||||
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | ✓ | BiSeNet Face Parsing |
|
||||
| Parsing | [face-segmentation](https://github.com/yakhyo/face-segmentation) | - | XSeg Face Segmentation |
|
||||
| Gaze | [gaze-estimation](https://github.com/yakhyo/gaze-estimation) | ✓ | MobileGaze Training |
|
||||
| Head Pose | [head-pose-estimation](https://github.com/yakhyo/head-pose-estimation) | ✓ | Head Pose Training (6DRepNet-style) |
|
||||
| Matting | [modnet](https://github.com/yakhyo/modnet) | - | MODNet Portrait Matting |
|
||||
| Anti-Spoofing | [face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | - | MiniFASNet Inference |
|
||||
| Attributes | [fairface-onnx](https://github.com/yakhyo/fairface-onnx) | - | FairFace ONNX Inference |
|
||||
|
||||
@@ -122,8 +324,20 @@ for face in faces:
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
||||
Contributions are welcome. Please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## Support
|
||||
|
||||
If you find this project useful, consider giving it a ⭐ on GitHub — it helps others discover it!
|
||||
|
||||
Questions or feedback:
|
||||
- Discord: https://discord.gg/wdzrjr7R5j
|
||||
- GitHub Issues: https://github.com/yakhyo/uniface/issues
|
||||
- DeepWiki Q&A: https://deepwiki.com/yakhyo/uniface
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](LICENSE).
|
||||
|
||||
> **Disclaimer:** This project is not affiliated with or related to
|
||||
> [Uniface](https://uniface.com/) by Rocket Software.
|
||||
|
||||
BIN
assets/demos/age_gender.jpg
Normal file
|
After Width: | Height: | Size: 206 KiB |
BIN
assets/demos/anonymization.jpg
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
BIN
assets/demos/detection.jpg
Normal file
|
After Width: | Height: | Size: 341 KiB |
BIN
assets/demos/gaze.jpg
Normal file
|
After Width: | Height: | Size: 212 KiB |
BIN
assets/demos/headpose.jpg
Normal file
|
After Width: | Height: | Size: 233 KiB |
BIN
assets/demos/landmarks.jpg
Normal file
|
After Width: | Height: | Size: 428 KiB |
BIN
assets/demos/matting.jpg
Normal file
|
After Width: | Height: | Size: 938 KiB |
BIN
assets/demos/parsing.jpg
Normal file
|
After Width: | Height: | Size: 712 KiB |
BIN
assets/demos/segmentation.jpg
Normal file
|
After Width: | Height: | Size: 851 KiB |
BIN
assets/demos/src_friends.jpg
Normal file
|
After Width: | Height: | Size: 171 KiB |
BIN
assets/demos/src_man1.jpg
Normal file
|
After Width: | Height: | Size: 63 KiB |
BIN
assets/demos/src_man2.jpg
Normal file
|
After Width: | Height: | Size: 220 KiB |
BIN
assets/demos/src_man3.jpg
Normal file
|
After Width: | Height: | Size: 146 KiB |
BIN
assets/demos/src_meeting.jpg
Normal file
|
After Width: | Height: | Size: 96 KiB |
BIN
assets/demos/src_portrait1.jpg
Normal file
|
After Width: | Height: | Size: 208 KiB |
BIN
assets/demos/verification.jpg
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
assets/einstein/img_0.png
Normal file
|
After Width: | Height: | Size: 99 KiB |
BIN
assets/test_images/image5.jpg
Normal file
|
After Width: | Height: | Size: 5.8 KiB |
@@ -93,7 +93,7 @@ landmarks = face.landmarks # Shape: (5, 2)
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
@@ -110,6 +110,28 @@ landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
### 98 / 68-Point Landmarks (PIPNet)
|
||||
|
||||
Returned by `PIPNet`. The variant determines the layout:
|
||||
|
||||
```python
|
||||
from uniface.constants import PIPNetWeights
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
# 98-point WFLW layout (default)
|
||||
landmarks = PIPNet().get_landmarks(image, face.bbox)
|
||||
# Shape: (98, 2)
|
||||
|
||||
# 68-point 300W layout
|
||||
landmarks = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68).get_landmarks(image, face.bbox)
|
||||
# Shape: (68, 2)
|
||||
```
|
||||
|
||||
The 98-point output follows the standard [WFLW](https://wywu.github.io/projects/LAB/WFLW.html) layout
|
||||
(33 face-contour points, eyebrow/eye/nose/mouth groups). The 68-point output follows the standard
|
||||
[300W / iBUG](https://ibug.doc.ic.ac.uk/resources/300-W/) layout. Coordinates are in original-image
|
||||
pixel space, identical in convention to `Landmark106`.
|
||||
|
||||
---
|
||||
|
||||
## Face Crop
|
||||
@@ -174,7 +196,7 @@ yaw = -90° ────┼──── yaw = +90°
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
|
||||
@@ -9,7 +9,7 @@ UniFace uses ONNX Runtime for model inference, which supports multiple hardware
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
@@ -17,8 +17,8 @@ detector = RetinaFace()
|
||||
|
||||
**Priority order:**
|
||||
|
||||
1. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
2. **CoreMLExecutionProvider** - Apple Silicon
|
||||
1. **CoreMLExecutionProvider** - Apple Silicon
|
||||
2. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
3. **CPUExecutionProvider** - Fallback
|
||||
|
||||
---
|
||||
@@ -28,7 +28,8 @@ detector = RetinaFace()
|
||||
You can specify which execution provider to use by passing the `providers` parameter:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Force CPU execution (even if GPU is available)
|
||||
detector = RetinaFace(providers=['CPUExecutionProvider'])
|
||||
@@ -38,16 +39,20 @@ recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
detector = RetinaFace(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
All model classes accept the `providers` parameter:
|
||||
All **ONNX-based** model classes accept the `providers` parameter:
|
||||
|
||||
- Detection: `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face`
|
||||
- Recognition: `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace`
|
||||
- Landmarks: `Landmark106`
|
||||
- Landmarks: `Landmark106`, `PIPNet`
|
||||
- Gaze: `MobileGaze`
|
||||
- Parsing: `BiSeNet`
|
||||
- Parsing: `BiSeNet`, `XSeg`
|
||||
- Attributes: `AgeGender`, `FairFace`
|
||||
- Anti-Spoofing: `MiniFASNet`
|
||||
|
||||
!!! note "Non-ONNX components"
|
||||
- **Emotion** uses TorchScript and selects its device automatically (`mps` / `cuda` / `cpu`). It does **not** accept the `providers` parameter.
|
||||
- **BlurFace** is a pure OpenCV utility and does not load any model.
|
||||
|
||||
---
|
||||
|
||||
## Check Available Providers
|
||||
@@ -88,7 +93,7 @@ print("Available providers:", providers)
|
||||
No additional setup required. ARM64 optimizations are built into `onnxruntime`:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
Verify ARM64:
|
||||
@@ -105,7 +110,7 @@ python -c "import platform; print(platform.machine())"
|
||||
|
||||
### NVIDIA GPU (CUDA)
|
||||
|
||||
Install with GPU support:
|
||||
Install with GPU support (this installs `onnxruntime-gpu`, which already includes CPU fallback):
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
@@ -135,7 +140,7 @@ else:
|
||||
CPU execution is always available:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
Works on all platforms without additional configuration.
|
||||
@@ -174,7 +179,7 @@ pip install uniface[gpu]
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
@@ -210,7 +215,7 @@ for image_path in image_paths:
|
||||
|
||||
3. Reinstall with GPU support:
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu
|
||||
pip uninstall onnxruntime onnxruntime-gpu -y
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ class Face:
|
||||
# Required (from detection)
|
||||
bbox: np.ndarray # [x1, y1, x2, y2]
|
||||
confidence: float # 0.0 to 1.0
|
||||
landmarks: np.ndarray # (5, 2) or (106, 2)
|
||||
landmarks: np.ndarray # (5, 2) from detectors. Dense landmarkers return (106, 2), (98, 2), or (68, 2).
|
||||
|
||||
# Optional (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
@@ -53,6 +53,7 @@ class Face:
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
track_id: int | None = None # Persistent ID from tracker
|
||||
```
|
||||
|
||||
### Properties
|
||||
@@ -105,6 +106,27 @@ print(f"Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
|
||||
---
|
||||
|
||||
### HeadPoseResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class HeadPoseResult:
|
||||
pitch: float # Rotation around X-axis (degrees), + = looking down
|
||||
yaw: float # Rotation around Y-axis (degrees), + = looking right
|
||||
roll: float # Rotation around Z-axis (degrees), + = tilting clockwise
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
result = head_pose.estimate(face_crop)
|
||||
print(f"Pitch: {result.pitch:.1f}°")
|
||||
print(f"Yaw: {result.yaw:.1f}°")
|
||||
print(f"Roll: {result.roll:.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SpoofingResult
|
||||
|
||||
```python
|
||||
@@ -143,11 +165,11 @@ class AttributeResult:
|
||||
|
||||
```python
|
||||
# AgeGender model
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
result = age_gender.predict(image, face)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
|
||||
# FairFace model
|
||||
result = fairface.predict(image, face.bbox)
|
||||
result = fairface.predict(image, face)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
@@ -170,14 +192,14 @@ Face recognition models return normalized 512-dimensional embeddings:
|
||||
|
||||
```python
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
print(f"Shape: {embedding.shape}") # (1, 512)
|
||||
print(f"Shape: {embedding.shape}") # (512,)
|
||||
print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
```
|
||||
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
@@ -199,16 +221,16 @@ print(f"Classes: {np.unique(mask)}") # [0, 1, 2, ...]
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
| 0 | Background | 10 | Nose |
|
||||
| 1 | Skin | 11 | Mouth |
|
||||
| 2 | Left Eyebrow | 12 | Upper Lip |
|
||||
| 3 | Right Eyebrow | 13 | Lower Lip |
|
||||
| 4 | Left Eye | 14 | Neck |
|
||||
| 5 | Right Eye | 15 | Necklace |
|
||||
| 6 | Eyeglasses | 16 | Cloth |
|
||||
| 7 | Left Ear | 17 | Hair |
|
||||
| 8 | Right Ear | 18 | Hat |
|
||||
| 9 | Earring | | |
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ UniFace automatically downloads and caches models. This page explains how model
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
@@ -32,9 +32,9 @@ Default cache directory:
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
├── retinaface_mv2.onnx
|
||||
├── w600k_mbf.onnx
|
||||
├── 2d106det.onnx
|
||||
├── retinaface_mnet_v2.onnx
|
||||
├── arcface_mnet.onnx
|
||||
├── 2d_106.onnx
|
||||
├── gaze_resnet34.onnx
|
||||
├── parsing_resnet18.onnx
|
||||
└── ...
|
||||
@@ -44,44 +44,57 @@ Default cache directory:
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Specify a custom cache location:
|
||||
Use the programmatic API to change the cache location at runtime:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
# Download to custom directory
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./my_models'
|
||||
)
|
||||
print(f"Model at: {model_path}")
|
||||
# Set a custom cache directory
|
||||
set_cache_dir('/data/models')
|
||||
|
||||
# Verify the current path
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# All subsequent model loads use the new directory
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Downloads to /data/models/
|
||||
```
|
||||
|
||||
Or set the `UNIFACE_CACHE_DIR` environment variable (see [Environment Variables](#environment-variables) below).
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment:
|
||||
Download models before deployment using the concurrent downloader:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.model_store import download_models
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download all needed models
|
||||
models = [
|
||||
# Download multiple models concurrently (up to 4 threads by default)
|
||||
paths = download_models([
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
]
|
||||
])
|
||||
|
||||
for model in models:
|
||||
path = verify_model_weights(model)
|
||||
print(f"Downloaded: {path}")
|
||||
for model, path in paths.items():
|
||||
print(f"{model.value} -> {path}")
|
||||
```
|
||||
|
||||
Or download one at a time:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
@@ -115,11 +128,20 @@ print(f"Copy from: {path}")
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Use normally
|
||||
### 3. Point to the cache (if non-default location)
|
||||
|
||||
```python
|
||||
from uniface.model_store import set_cache_dir
|
||||
|
||||
# Only needed if the models are not at ~/.uniface/models/
|
||||
set_cache_dir('/path/to/copied/models')
|
||||
```
|
||||
|
||||
### 4. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
@@ -172,6 +194,8 @@ If a model fails verification, it's re-downloaded automatically.
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| Landmark106 | 14 MB | ✅ |
|
||||
| PIPNet WFLW-98 | 47 MB | ✅ |
|
||||
| PIPNet 300W+CelebA-68 | 46 MB | ✅ |
|
||||
| AgeGender | 8 MB | ✅ |
|
||||
| FairFace | 44 MB | ✅ |
|
||||
| Gaze ResNet34 | 82 MB | ✅ |
|
||||
@@ -182,7 +206,12 @@ If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Remove cached models:
|
||||
Find and remove cached models:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir
|
||||
print(get_cache_dir()) # shows the active cache path
|
||||
```
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
@@ -198,20 +227,35 @@ Models will be re-downloaded on next use.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set custom cache location via environment variable:
|
||||
There are three equivalent ways to configure the cache directory:
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
**1. Programmatic API (recommended)**
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/path/to/custom/cache')
|
||||
print(get_cache_dir()) # /path/to/custom/cache
|
||||
```
|
||||
|
||||
**2. Direct environment variable (Python)**
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
**3. Shell environment variable**
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
All three methods set the same `UNIFACE_CACHE_DIR` environment variable under the hood. `get_cache_dir()` always returns the resolved path.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -23,35 +23,50 @@ graph TB
|
||||
LMK[Landmarks]
|
||||
ATTR[Attributes]
|
||||
GAZE[Gaze]
|
||||
HPOSE[Head Pose]
|
||||
PARSE[Parsing]
|
||||
SPOOF[Anti-Spoofing]
|
||||
MATT[Matting]
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Tracking
|
||||
TRK[BYTETracker]
|
||||
end
|
||||
|
||||
subgraph Stores
|
||||
IDX[FAISS Vector Store]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
|
||||
IMG --> DET
|
||||
IMG --> MATT
|
||||
DET --> REC
|
||||
DET --> LMK
|
||||
DET --> ATTR
|
||||
DET --> GAZE
|
||||
DET --> HPOSE
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
DET --> TRK
|
||||
REC --> IDX
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
TRK --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
### 1. ONNX-First
|
||||
### 1. Cross-Platform Inference
|
||||
|
||||
All models use ONNX Runtime for inference:
|
||||
UniFace uses portable model runtimes to provide consistent inference across macOS, Linux, and Windows. Most core components run through ONNX Runtime, while optional components may use PyTorch where appropriate.
|
||||
|
||||
- **Cross-platform**: Same models work on macOS, Linux, Windows
|
||||
- **Hardware acceleration**: Automatic selection of optimal provider
|
||||
@@ -74,12 +89,14 @@ tqdm # Progress bars
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
# Factory function
|
||||
detector = create_detector('retinaface')
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Direct instantiation (recommended)
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
|
||||
# Or via factory function
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
@@ -98,18 +115,22 @@ def detect(self, image: np.ndarray) -> list[Face]:
|
||||
```
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, EdgeFace, MobileFace, SphereFace)
|
||||
├── tracking/ # Multi-object tracking (BYTETracker)
|
||||
├── landmark/ # Dense landmarks (Landmark106 = 106 pts, PIPNet = 98 / 68 pts)
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
├── matting/ # Portrait matting (MODNet)
|
||||
├── gaze/ # Gaze estimation
|
||||
├── headpose/ # Head pose estimation
|
||||
├── spoofing/ # Anti-spoofing
|
||||
├── privacy/ # Face anonymization
|
||||
├── types.py # Dataclasses (Face, GazeResult, etc.)
|
||||
├── stores/ # Vector stores (FAISS)
|
||||
├── types.py # Dataclasses (Face, GazeResult, HeadPoseResult, etc.)
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── visualization.py # Drawing utilities
|
||||
└── draw.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
@@ -120,7 +141,9 @@ A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -139,7 +162,7 @@ for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
attrs = age_gender.predict(image, face)
|
||||
|
||||
print(f"Face: {attrs.sex}, {attrs.age} years")
|
||||
```
|
||||
@@ -151,12 +174,20 @@ for face in faces:
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
detector,
|
||||
recognizer=recognizer,
|
||||
attributes=[age_gender, fairface],
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
@@ -170,7 +201,7 @@ for face in faces:
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/`
|
||||
2. **Cached**: Stored in `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR`)
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
@@ -179,6 +210,11 @@ for face in faces:
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Optionally configure cache location
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
@@ -11,7 +11,7 @@ This page explains how to tune detection and recognition thresholds for your use
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
@@ -81,7 +81,7 @@ For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
@@ -199,19 +199,13 @@ else:
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
scores = [f.confidence for f in faces if f.confidence > 0.7]
|
||||
landmarks = [f.landmarks for f in faces if f.confidence > 0.7]
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Only draw high-confidence detections (confidence ≥ vis_threshold)
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6 # Additional visualization filter
|
||||
faces=faces,
|
||||
vis_threshold=0.7,
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -6,16 +6,20 @@ Thank you for contributing to UniFace!
|
||||
|
||||
## Quick Start
|
||||
|
||||
We use [uv](https://docs.astral.sh/uv/) for reproducible dev installs (lockfile-pinned).
|
||||
|
||||
```bash
|
||||
# Install uv first: https://docs.astral.sh/uv/getting-started/installation/
|
||||
|
||||
# Clone
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
# Install runtime + cpu + dev extras from uv.lock (--extra gpu for CUDA)
|
||||
uv sync --extra cpu --extra dev
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
uv run pytest
|
||||
```
|
||||
|
||||
---
|
||||
@@ -39,10 +43,43 @@ ruff check . --fix
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
`pre-commit` is included in the `[dev]` extra, so `uv sync` already installs it.
|
||||
|
||||
```bash
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
uv run pre-commit install
|
||||
uv run pre-commit run --all-files
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Commit Messages
|
||||
|
||||
We follow [Conventional Commits](https://www.conventionalcommits.org/):
|
||||
|
||||
```
|
||||
<type>: <short description>
|
||||
```
|
||||
|
||||
| Type | When to use |
|
||||
|--------------|--------------------------------------------------|
|
||||
| **feat** | New feature or capability |
|
||||
| **fix** | Bug fix |
|
||||
| **docs** | Documentation changes |
|
||||
| **style** | Formatting, whitespace (no logic change) |
|
||||
| **refactor** | Code restructuring without changing behavior |
|
||||
| **perf** | Performance improvement |
|
||||
| **test** | Adding or updating tests |
|
||||
| **build** | Build system or dependencies |
|
||||
| **ci** | CI/CD and pre-commit configuration |
|
||||
| **chore** | Routine maintenance and tooling |
|
||||
|
||||
**Examples:**
|
||||
|
||||
```
|
||||
feat: Add gaze estimation model
|
||||
fix: Correct bounding box scaling for non-square images
|
||||
ci: Add nbstripout pre-commit hook
|
||||
docs: Update installation instructions
|
||||
```
|
||||
|
||||
---
|
||||
@@ -67,6 +104,14 @@ pre-commit run --all-files
|
||||
|
||||
---
|
||||
|
||||
## Releases
|
||||
|
||||
Releases are automated via GitHub Actions. Maintainers trigger **Actions → Release Pipeline → Run workflow** with a [PEP 440](https://peps.python.org/pep-0440/) version (e.g. `0.7.0`, `0.7.0rc1`). The pipeline runs tests, bumps `pyproject.toml` + `uniface/__init__.py`, tags the commit, publishes to PyPI, and creates a GitHub Release. Docs redeploy only for stable releases.
|
||||
|
||||
See [CONTRIBUTING.md](https://github.com/yakhyo/uniface/blob/main/CONTRIBUTING.md#release-process) for the full process.
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue on [GitHub](https://github.com/yakhyo/uniface/issues).
|
||||
|
||||
384
docs/datasets.md
Normal file
@@ -0,0 +1,384 @@
|
||||
# Datasets
|
||||
|
||||
Overview of all training datasets and evaluation benchmarks used by UniFace models.
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Task | Dataset | Scale | Models |
|
||||
| ----------- | ------------------------------------------------ | ---------------------- | ------------------------------------------- |
|
||||
| Detection | [WIDER FACE](#wider-face) | 32K images | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
| Recognition | [MS1MV2](#ms1mv2) | 5.8M images, 85.7K IDs | MobileFace, SphereFace |
|
||||
| Recognition | [WebFace600K](#webface600k) | 600K images | ArcFace |
|
||||
| Recognition | [WebFace4M / WebFace12M](#webface4m--webface12m) | 4M / 12M images | AdaFace |
|
||||
| Landmarks | [WFLW](#wflw) / [300W+CelebA](#300w--celeba) | 10K / 3.8K labeled + 202.6K unlabeled | PIPNet (98 / 68 pts) |
|
||||
| Gaze | [Gaze360](#gaze360) | 238 subjects | MobileGaze |
|
||||
| Parsing | [CelebAMask-HQ](#celebamask-hq) | 30K images | BiSeNet |
|
||||
| Attributes | [CelebA](#celeba) | 200K images | AgeGender |
|
||||
| Attributes | [FairFace](#fairface) | Balanced demographics | FairFace |
|
||||
| Attributes | [AffectNet](#affectnet) | Emotion labels | Emotion |
|
||||
|
||||
---
|
||||
|
||||
## Training Datasets
|
||||
|
||||
### Face Detection
|
||||
|
||||
#### WIDER FACE
|
||||
|
||||
Large-scale face detection benchmark with images across 61 event categories. Contains faces with a high degree of variability in scale, pose, occlusion, expression, and illumination.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ------------------------------------------- |
|
||||
| Images | ~32,000 (train/val/test split) |
|
||||
| Faces | ~394,000 annotated |
|
||||
| Subsets | Easy, Medium, Hard |
|
||||
| Used by | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
|
||||
!!! info "Download & References"
|
||||
**Paper**: [WIDER FACE: A Face Detection Benchmark](https://arxiv.org/abs/1511.06523)
|
||||
|
||||
**Download**: [http://shuoyang1213.me/WIDERFACE/](http://shuoyang1213.me/WIDERFACE/)
|
||||
|
||||
---
|
||||
|
||||
### Face Recognition
|
||||
|
||||
#### MS1MV2
|
||||
|
||||
Refined version of the MS-Celeb-1M dataset, cleaned by InsightFace. Widely used for training face recognition models.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 85.7K |
|
||||
| Images | 5.8M |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | MobileFace, SphereFace |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [ms1m-arcface-dataset](https://www.kaggle.com/datasets/yakhyokhuja/ms1m-arcface-dataset) (from InsightFace)
|
||||
|
||||
**Training code**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
|
||||
---
|
||||
|
||||
#### WebFace600K
|
||||
|
||||
Medium-scale face recognition dataset from the WebFace series.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ------- |
|
||||
| Images | ~600K |
|
||||
| Used by | ArcFace |
|
||||
|
||||
!!! info "Source"
|
||||
**Origin**: [InsightFace](https://github.com/deepinsight/insightface)
|
||||
|
||||
**Paper**: [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
|
||||
---
|
||||
|
||||
#### WebFace4M / WebFace12M
|
||||
|
||||
Large-scale face recognition datasets from the WebFace260M collection. Used for training AdaFace models with adaptive quality-aware margin.
|
||||
|
||||
| Property | WebFace4M | WebFace12M |
|
||||
| -------- | ------------- | -------------- |
|
||||
| Images | ~4M | ~12M |
|
||||
| Used by | AdaFace IR_18 | AdaFace IR_101 |
|
||||
|
||||
!!! info "Source"
|
||||
**Paper**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
|
||||
**Original code**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace)
|
||||
|
||||
---
|
||||
|
||||
#### CASIA-WebFace
|
||||
|
||||
Smaller-scale face recognition dataset suitable for academic research and lighter training runs.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 10.6K |
|
||||
| Images | 491K |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | Alternative training set |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [webface-112x112](https://www.kaggle.com/datasets/yakhyokhuja/webface-112x112) (from OpenSphere)
|
||||
|
||||
---
|
||||
|
||||
#### VGGFace2
|
||||
|
||||
Large-scale dataset with wide variations in pose, age, illumination, ethnicity, and profession.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 8.6K |
|
||||
| Images | 3.1M |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | Alternative training set |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [vggface2-112x112](https://www.kaggle.com/datasets/yakhyokhuja/vggface2-112x112) (from OpenSphere)
|
||||
|
||||
---
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
#### WFLW
|
||||
|
||||
Wider Facial Landmarks in-the-Wild — a 98-point landmark dataset whose images come from
|
||||
WIDER FACE. Used to train the supervised PIPNet 98-point variant shipped with UniFace.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | -------------------------------------- |
|
||||
| Images | 10,000 (7,500 train / 2,500 test) |
|
||||
| Annotation | 98 manually labeled landmarks per face |
|
||||
| Used by | PIPNet WFLW-98 |
|
||||
|
||||
!!! info "Reference"
|
||||
**Project page**: [WFLW dataset](https://wywu.github.io/projects/LAB/WFLW.html)
|
||||
|
||||
---
|
||||
|
||||
#### 300W + CelebA
|
||||
|
||||
The 68-point PIPNet variant is trained in a generalizable semi-supervised setting (GSSL):
|
||||
labeled images come from 300W and unlabeled images come from CelebA.
|
||||
|
||||
| Property | Value |
|
||||
| --------------- | -------------------------------------------------------------------------------- |
|
||||
| Labeled images | 3,837 (3,148 train: LFPW train + HELEN train + AFW; 689 test: LFPW test + HELEN test + iBUG) |
|
||||
| Unlabeled | 202,599 (full CelebA; bounding boxes from RetinaFace per the PIPNet paper) |
|
||||
| Annotation | 68-point iBUG layout |
|
||||
| Used by | PIPNet 300W+CelebA-68 |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [PIPNet (Pixel-in-Pixel Net)](https://arxiv.org/abs/2003.03771) (IJCV 2021)
|
||||
|
||||
---
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
#### Gaze360
|
||||
|
||||
Large-scale gaze estimation dataset collected in indoor and outdoor environments with diverse head poses and wide gaze ranges (up to 360 degrees).
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | --------------------- |
|
||||
| Subjects | 238 |
|
||||
| Environment | Indoor and outdoor |
|
||||
| Used by | All MobileGaze models |
|
||||
|
||||
!!! info "Download & Preprocessing"
|
||||
**Download**: [gaze360.csail.mit.edu/download.php](https://gaze360.csail.mit.edu/download.php)
|
||||
|
||||
**Preprocessing**: [GazeHub - Gaze360](https://phi-ai.buaa.edu.cn/Gazehub/3D-dataset/#gaze360)
|
||||
|
||||
!!! note "UniFace Models"
|
||||
All MobileGaze models shipped with UniFace are trained exclusively on Gaze360 for 200 epochs.
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
data/
|
||||
└── Gaze360/
|
||||
├── Image/
|
||||
└── Label/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### MPIIFaceGaze
|
||||
|
||||
Dataset for appearance-based gaze estimation from laptop webcam images of participants during everyday laptop usage. Supported by the gaze estimation training code but not used for the UniFace pretrained weights.
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | ---------------------------------------- |
|
||||
| Subjects | 15 |
|
||||
| Environment | Everyday laptop usage |
|
||||
| Used by | Supported (not used for UniFace weights) |
|
||||
|
||||
!!! info "Download & Preprocessing"
|
||||
**Download**: [MPIIFaceGaze download page](https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/research/gaze-based-human-computer-interaction/its-written-all-over-your-face-full-face-appearance-based-gaze-estimation)
|
||||
|
||||
**Preprocessing**: [GazeHub - MPIIFaceGaze](https://phi-ai.buaa.edu.cn/Gazehub/3D-dataset/#mpiifacegaze)
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
data/
|
||||
└── MPIIFaceGaze/
|
||||
├── Image/
|
||||
└── Label/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Head Pose Estimation
|
||||
|
||||
#### 300W-LP
|
||||
|
||||
Large-scale synthesized face dataset with large pose variations, generated from 300W by face profiling. Used for training head pose estimation models.
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | ----------------------------- |
|
||||
| Images | ~122,000 (synthesized) |
|
||||
| Source | 300W (profiled) |
|
||||
| Pose range | ±90° yaw |
|
||||
| Evaluation | AFLW2000 |
|
||||
| Used by | All HeadPose models |
|
||||
|
||||
!!! info "Download & Reference"
|
||||
**Paper**: [Face Alignment Across Large Poses: A 3D Solution](https://arxiv.org/abs/1511.07212)
|
||||
|
||||
**Training code**: [yakhyo/head-pose-estimation](https://github.com/yakhyo/head-pose-estimation)
|
||||
|
||||
!!! note "UniFace Models"
|
||||
All HeadPose models shipped with UniFace are trained on 300W-LP and evaluated on AFLW2000.
|
||||
|
||||
---
|
||||
|
||||
### Face Parsing
|
||||
|
||||
#### CelebAMask-HQ
|
||||
|
||||
High-quality face parsing dataset with pixel-level annotations for 19 facial component classes.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ---------------------------- |
|
||||
| Images | 30,000 |
|
||||
| Classes | 19 facial components |
|
||||
| Resolution | High quality |
|
||||
| Used by | BiSeNet (ResNet18, ResNet34) |
|
||||
|
||||
!!! info "Source"
|
||||
**GitHub**: [switchablenorms/CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ)
|
||||
|
||||
**Training code**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
dataset/
|
||||
├── images/ # Input face images
|
||||
│ ├── image1.jpg
|
||||
│ └── ...
|
||||
└── labels/ # Segmentation masks
|
||||
├── image1.png
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Attribute Analysis
|
||||
|
||||
#### CelebA
|
||||
|
||||
Large-scale face attributes dataset widely used for training age and gender prediction models.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | -------------------- |
|
||||
| Images | ~200K |
|
||||
| Attributes | 40 binary attributes |
|
||||
| Used by | AgeGender |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [Deep Learning Face Attributes in the Wild](https://arxiv.org/abs/1411.7766)
|
||||
|
||||
---
|
||||
|
||||
#### FairFace
|
||||
|
||||
Face attribute dataset designed for balanced representation across race, gender, and age groups. Provides more equitable predictions compared to imbalanced datasets.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ----------------------------------- |
|
||||
| Attributes | Race (7), Gender (2), Age Group (9) |
|
||||
| Used by | FairFace |
|
||||
| License | CC BY 4.0 |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [FairFace: Face Attribute Dataset for Balanced Race, Gender, and Age](https://arxiv.org/abs/1908.04913)
|
||||
|
||||
**ONNX inference**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx)
|
||||
|
||||
---
|
||||
|
||||
#### AffectNet
|
||||
|
||||
Large-scale facial expression dataset for emotion recognition training.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ----------------------------------------------------------------------- |
|
||||
| Classes | 7 or 8 (Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry + Contempt) |
|
||||
| Used by | Emotion (AFFECNET7, AFFECNET8) |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [AffectNet: A Database for Facial Expression, Valence, and Arousal Computing in the Wild](https://ieeexplore.ieee.org/document/8013713)
|
||||
|
||||
---
|
||||
|
||||
## Evaluation Benchmarks
|
||||
|
||||
### Face Detection
|
||||
|
||||
#### WIDER FACE Validation Set
|
||||
|
||||
The standard benchmark for face detection models. Results are reported across three difficulty subsets.
|
||||
|
||||
| Subset | Criteria |
|
||||
| ------ | --------------------------------------------- |
|
||||
| Easy | Large, clear, unoccluded faces |
|
||||
| Medium | Moderate scale and occlusion |
|
||||
| Hard | Small, heavily occluded, or challenging faces |
|
||||
|
||||
See [Model Zoo - Detection](models.md#face-detection-models) for per-model accuracy on each subset.
|
||||
|
||||
---
|
||||
|
||||
### Face Recognition
|
||||
|
||||
Recognition models are evaluated across multiple benchmarks. Aligned 112x112 validation datasets are available as a single download.
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle**: [agedb-30-calfw-cplfw-lfw-aligned-112x112](https://www.kaggle.com/datasets/yakhyokhuja/agedb-30-calfw-cplfw-lfw-aligned-112x112)
|
||||
|
||||
| Benchmark | Description | Used by |
|
||||
| ------------ | ----------------------------------------------------------------- | ------------------------------- |
|
||||
| **LFW** | Labeled Faces in the Wild - standard face verification benchmark | ArcFace, MobileFace, SphereFace |
|
||||
| **CALFW** | Cross-Age LFW - face verification across age gaps | MobileFace, SphereFace |
|
||||
| **CPLFW** | Cross-Pose LFW - face verification across pose variations | MobileFace, SphereFace |
|
||||
| **AgeDB-30** | Age database with 30-year age gaps | ArcFace, MobileFace, SphereFace |
|
||||
| **CFP-FP** | Celebrities in Frontal-Profile - frontal vs. profile verification | ArcFace |
|
||||
| **IJB-B** | IARPA Janus Benchmark B - TAR@FAR=0.01% | AdaFace |
|
||||
| **IJB-C** | IARPA Janus Benchmark C - TAR@FAR=1e-4 | AdaFace, ArcFace |
|
||||
|
||||
See [Model Zoo - Recognition](models.md#face-recognition-models) for per-model accuracy on each benchmark.
|
||||
|
||||
---
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
| Benchmark | Metric | Description |
|
||||
| -------------------- | ------------- | -------------------------------------------- |
|
||||
| **Gaze360 test set** | MAE (degrees) | Mean Absolute Error in gaze angle prediction |
|
||||
|
||||
See [Model Zoo - Gaze](models.md#gaze-estimation-models) for per-model MAE scores.
|
||||
|
||||
---
|
||||
|
||||
## Training Repositories
|
||||
|
||||
For training your own models or reproducing results, see the following repositories:
|
||||
|
||||
| Task | Repository | Datasets Supported |
|
||||
| ----------- | ------------------------------------------------------------------------- | ------------------------------- |
|
||||
| Detection | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | WIDER FACE |
|
||||
| Recognition | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MS1MV2, CASIA-WebFace, VGGFace2 |
|
||||
| Gaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | Gaze360, MPIIFaceGaze |
|
||||
| Parsing | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | CelebAMask-HQ |
|
||||
@@ -10,12 +10,17 @@ template: home.html
|
||||
|
||||
# UniFace { .hero-title }
|
||||
|
||||
<p class="hero-subtitle">A lightweight, production-ready face analysis library built on ONNX Runtime</p>
|
||||
<p class="hero-subtitle">A Unified Face Analysis Library for Python</p>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
<!-- <img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/uniface_rounded_q80.webp" alt="UniFace - A Unified Face Analysis Library for Python" style="max-width: 70%; margin: 1rem 0;"> -->
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
@@ -26,17 +31,17 @@ template: home.html
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-recognition: Face Detection
|
||||
ONNX-optimized detectors (RetinaFace, SCRFD, YOLO) with 5-point landmarks.
|
||||
RetinaFace, SCRFD, and YOLO detectors with 5-point landmarks.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-check: Face Recognition
|
||||
AdaFace, ArcFace, MobileFace, and SphereFace embeddings for identity verification.
|
||||
AdaFace, ArcFace, EdgeFace, MobileFace, and SphereFace embeddings for identity verification.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-map-marker: Landmarks
|
||||
Accurate 106-point facial landmark localization for detailed face analysis.
|
||||
Dense facial landmark localization — 106-point (2d106det) and 98 / 68-point (PIPNet) variants.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
@@ -54,6 +59,16 @@ BiSeNet semantic segmentation with 19 facial component classes.
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-axis-arrow: Head Pose
|
||||
3D head orientation (pitch, yaw, roll) estimation with 6D rotation models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-motion-play: Tracking
|
||||
Multi-object tracking with BYTETracker for persistent face IDs across video frames.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
@@ -64,31 +79,35 @@ Face liveness detection with MiniFASNet to prevent fraud.
|
||||
Face anonymization with 5 blur methods for privacy protection.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-database-search: Vector Indexing
|
||||
FAISS-backed embedding store for fast multi-identity face search.
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
=== "Standard"
|
||||
UniFace uses portable model runtimes for consistent inference across macOS, Linux, and Windows. Most core components run through **ONNX Runtime**, while optional components may use **PyTorch** where appropriate.
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
**CPU / Apple Silicon**
|
||||
```bash
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
=== "GPU (CUDA)"
|
||||
**GPU (NVIDIA CUDA)**
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
=== "From Source"
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
**From Source**
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[cpu]" # or .[gpu] for CUDA
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -11,15 +11,27 @@ This guide covers all installation options for UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Why Two Extras?
|
||||
|
||||
`onnxruntime` (CPU) and `onnxruntime-gpu` (CUDA) both own the same Python namespace.
|
||||
Installing both at the same time causes file conflicts and silent provider mismatches.
|
||||
UniFace exposes them as separate, mutually exclusive extras so you install exactly one.
|
||||
|
||||
---
|
||||
|
||||
## Quick Install
|
||||
|
||||
The simplest way to install UniFace:
|
||||
=== "CPU / Apple Silicon"
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
```bash
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
This installs the CPU version with all core dependencies.
|
||||
=== "NVIDIA GPU (CUDA)"
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -27,14 +39,16 @@ This installs the CPU version with all core dependencies.
|
||||
|
||||
### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes ARM64 optimizations:
|
||||
The `[cpu]` extra pulls in the standard `onnxruntime` package, which has native ARM64 support
|
||||
built in since version 1.13. No additional setup is needed for CoreML acceleration.
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
!!! tip "Native Performance"
|
||||
The base `onnxruntime` package has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+. No additional configuration needed.
|
||||
`onnxruntime` 1.13+ includes ARM64 optimizations out of the box.
|
||||
UniFace automatically detects and enables `CoreMLExecutionProvider` on Apple Silicon.
|
||||
|
||||
Verify ARM64 installation:
|
||||
|
||||
@@ -47,19 +61,22 @@ python -c "import platform; print(platform.machine())"
|
||||
|
||||
### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
This installs `onnxruntime-gpu`, which includes both `CUDAExecutionProvider` and
|
||||
`CPUExecutionProvider` — no separate CPU package is needed.
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- NVIDIA driver compatible with your CUDA version
|
||||
- CUDA 11.x or 12.x toolkit
|
||||
- cuDNN 8.x
|
||||
|
||||
!!! info "CUDA Compatibility"
|
||||
See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility matrix.
|
||||
See the [ONNX Runtime GPU compatibility matrix](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
for matching CUDA and cuDNN versions.
|
||||
|
||||
Verify GPU installation:
|
||||
|
||||
@@ -74,7 +91,7 @@ print("Available providers:", ort.get_available_providers())
|
||||
### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
pip install uniface[cpu]
|
||||
```
|
||||
|
||||
Works on all platforms with automatic CPU fallback.
|
||||
@@ -88,29 +105,60 @@ For development or the latest features:
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
|
||||
pip install -e ".[cpu]" # CPU / Apple Silicon
|
||||
pip install -e ".[gpu]" # NVIDIA GPU
|
||||
```
|
||||
|
||||
With development dependencies:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
pip install -e ".[cpu,dev]"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FAISS Vector Store
|
||||
|
||||
For fast multi-identity face search using a FAISS vector store:
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # CPU
|
||||
pip install faiss-gpu # NVIDIA GPU (CUDA)
|
||||
```
|
||||
|
||||
See the [Stores module](modules/stores.md) for usage.
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
UniFace has minimal dependencies:
|
||||
UniFace has minimal core dependencies:
|
||||
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `numpy` | Array operations |
|
||||
| `opencv-python` | Image processing |
|
||||
| `onnxruntime` | Model inference |
|
||||
| `scikit-image` | Geometric transforms |
|
||||
| `scipy` | Signal processing |
|
||||
| `requests` | Model download |
|
||||
| `tqdm` | Progress bars |
|
||||
|
||||
**Runtime extras (install exactly one):**
|
||||
|
||||
| Extra | Package | Use case |
|
||||
|-------|---------|---------|
|
||||
| `uniface[cpu]` | `onnxruntime` | CPU inference, Apple Silicon |
|
||||
| `uniface[gpu]` | `onnxruntime-gpu` | NVIDIA CUDA inference |
|
||||
|
||||
**Other optional packages:**
|
||||
|
||||
| Package | Install | Purpose |
|
||||
|---------|---------|---------|
|
||||
| `faiss-cpu` / `faiss-gpu` | `pip install faiss-cpu` | FAISS vector store |
|
||||
| `torch` | `pip install torch` | Emotion model (TorchScript) |
|
||||
| `torchvision` | `pip install torchvision` | Faster NMS for YOLO detectors |
|
||||
|
||||
---
|
||||
|
||||
## Verify Installation
|
||||
@@ -126,24 +174,88 @@ import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Upgrading
|
||||
|
||||
When upgrading UniFace, stay consistent with your runtime extra:
|
||||
|
||||
```bash
|
||||
pip install --upgrade uniface[cpu] # or uniface[gpu]
|
||||
```
|
||||
|
||||
If you are switching from CPU to GPU (or vice versa):
|
||||
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu -y
|
||||
pip install uniface[gpu] # install the one you want
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pre-release Versions
|
||||
|
||||
UniFace ships release candidates and betas to PyPI ahead of stable releases (versions like `0.7.0rc1`, `0.7.0b1`, `0.7.0a1`). These let you try upcoming features before they're finalized.
|
||||
|
||||
`pip install uniface` always installs the latest **stable** release. To opt in to pre-releases:
|
||||
|
||||
```bash
|
||||
# Latest pre-release (if newer than latest stable)
|
||||
pip install uniface[cpu] --pre
|
||||
|
||||
# A specific pre-release
|
||||
pip install uniface[cpu]==0.7.0rc1
|
||||
```
|
||||
|
||||
Pre-releases are not recommended for production — APIs may still change before the stable release.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### onnxruntime Not Found
|
||||
|
||||
If you see:
|
||||
|
||||
```
|
||||
ImportError: onnxruntime is not installed. Install it with one of:
|
||||
pip install uniface[cpu] # CPU / Apple Silicon
|
||||
pip install uniface[gpu] # NVIDIA GPU (CUDA)
|
||||
```
|
||||
|
||||
You installed uniface without an extra. Run the appropriate command above.
|
||||
|
||||
---
|
||||
|
||||
### Both onnxruntime and onnxruntime-gpu Installed
|
||||
|
||||
If you previously ran `pip install uniface[gpu]` on top of a `pip install uniface[cpu]`
|
||||
(or vice versa), you may have both packages installed simultaneously, which causes conflicts.
|
||||
Fix it with:
|
||||
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu -y
|
||||
pip install uniface[gpu] # or uniface[cpu]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Import Errors
|
||||
|
||||
If you encounter import errors, ensure you're using Python 3.10+:
|
||||
Ensure you're using Python 3.10+:
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Should show: Python 3.10.x or higher
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Model Download Issues
|
||||
|
||||
Models are automatically downloaded on first use. If downloads fail:
|
||||
@@ -157,6 +269,25 @@ model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CUDA Not Detected
|
||||
|
||||
1. Verify CUDA installation:
|
||||
```bash
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
2. Check CUDA version compatibility with ONNX Runtime.
|
||||
|
||||
3. Reinstall the GPU extra cleanly:
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu -y
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Performance Issues on Mac
|
||||
|
||||
Verify you're using the ARM64 build (not x86_64 via Rosetta):
|
||||
|
||||
@@ -20,5 +20,7 @@ UniFace is released under the [MIT License](https://opensource.org/licenses/MIT)
|
||||
| SphereFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| BiSeNet | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | MIT |
|
||||
| MobileGaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | MIT |
|
||||
| MODNet | [yakhyo/modnet](https://github.com/yakhyo/modnet) | Apache-2.0 |
|
||||
| MiniFASNet | [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | Apache-2.0 |
|
||||
| FairFace | [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) | CC BY 4.0 |
|
||||
| PIPNet | [yakhyo/pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) — meanface tables vendored from [jhb86253817/PIPNet](https://github.com/jhb86253817/PIPNet) | MIT |
|
||||
|
||||
187
docs/models.md
@@ -8,7 +8,7 @@ Complete guide to all available models and their performance characteristics.
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset.
|
||||
RetinaFace models are trained on the [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
@@ -22,29 +22,29 @@ RetinaFace models are trained on the WIDER FACE dataset.
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models trained on WIDER FACE dataset.
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models trained on [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% |
|
||||
| `SCRFD_10G` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% |
|
||||
| `SCRFD_500M_KPS` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% |
|
||||
| `SCRFD_10G_KPS` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide detection with 5-point facial landmarks, trained on WIDER FACE dataset.
|
||||
YOLOv5-Face models provide detection with 5-point facial landmarks, trained on [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| -------------- | ---- | ------ | ------ | ------ |
|
||||
@@ -55,7 +55,7 @@ YOLOv5-Face models provide detection with 5-point facial landmarks, trained on W
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640.
|
||||
@@ -74,7 +74,7 @@ YOLOv8-Face models use anchor-free design with DFL (Distribution Focal Loss) for
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --method yolov8face`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image> --method yolov8face`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv8-Face models use a fixed input size of 640×640.
|
||||
@@ -93,7 +93,7 @@ Face recognition using adaptive margin based on image quality.
|
||||
| `IR_101` | IR-101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Training Data & Accuracy"
|
||||
**Dataset**: WebFace4M (4M images) / WebFace12M (12M images)
|
||||
**Dataset**: [WebFace4M / WebFace12M](datasets.md#webface4m--webface12m) (4M / 12M images)
|
||||
|
||||
**Accuracy**: IJB-B and IJB-C benchmarks, TAR@FAR=0.01%
|
||||
|
||||
@@ -113,7 +113,7 @@ Face recognition using additive angular margin loss.
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
**Dataset**: Trained on [WebFace600K](datasets.md#webface600k) (600K images)
|
||||
|
||||
**Accuracy**: IJB-C accuracy reported as TAR@FAR=1e-4
|
||||
|
||||
@@ -131,7 +131,7 @@ Lightweight face recognition models with MobileNet backbones.
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Dataset**: Trained on [MS1MV2](datasets.md#ms1mv2) (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
@@ -147,7 +147,7 @@ Face recognition using angular softmax loss.
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Dataset**: Trained on [MS1MV2](datasets.md#ms1mv2) (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
@@ -156,6 +156,24 @@ Face recognition using angular softmax loss.
|
||||
|
||||
---
|
||||
|
||||
### EdgeFace
|
||||
|
||||
Efficient face recognition designed for edge devices, using EdgeNeXt backbone with optional LoRA compression.
|
||||
|
||||
| Model Name | Backbone | Params | MFLOPs | Size | LFW | CALFW | CPLFW | CFP-FP | AgeDB-30 |
|
||||
| --------------- | -------- | ------ | ------ | ----- | ------ | ------ | ------ | ------ | -------- |
|
||||
| `XXS` :material-check-circle: | EdgeNeXt | 1.24M | 94 | ~5 MB | 99.57% | 94.83% | 90.27% | 93.63% | 94.92% |
|
||||
| `XS_GAMMA_06` | EdgeNeXt | 1.77M | 154 | ~7 MB | 99.73% | 95.28% | 91.58% | 94.71% | 96.08% |
|
||||
| `S_GAMMA_05` | EdgeNeXt | 3.65M | 306 | ~14 MB | 99.78% | 95.55% | 92.48% | 95.74% | 97.03% |
|
||||
| `BASE` | EdgeNeXt | 18.2M | 1399 | ~70 MB | 99.83% | 96.07% | 93.75% | 97.01% | 97.60% |
|
||||
|
||||
!!! info "Training Data & Reference"
|
||||
**Paper**: [EdgeFace: Efficient Face Recognition Model for Edge Devices](https://arxiv.org/abs/2307.01838v2) (IEEE T-BIOM 2024)
|
||||
|
||||
**Source**: [github.com/otroshi/edgeface](https://github.com/otroshi/edgeface) | [github.com/yakhyo/edgeface-onnx](https://github.com/yakhyo/edgeface-onnx)
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
@@ -178,6 +196,26 @@ Facial landmark localization model.
|
||||
|
||||
---
|
||||
|
||||
### PIPNet (98 / 68 points)
|
||||
|
||||
PIPNet (Pixel-in-Pixel Net) facial landmark detector. ResNet-18 backbone, 256×256 input.
|
||||
|
||||
| Model Name | Points | Backbone | Dataset | Size |
|
||||
| ---------- | ------ | -------- | ------- | ---- |
|
||||
| `WFLW_98` :material-check-circle: | 98 | ResNet-18 | WFLW (supervised) | 47 MB |
|
||||
| `DW300_CELEBA_68` | 68 | ResNet-18 | 300W+CelebA (GSSL) | 46 MB |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [PIPNet: Towards Efficient Facial Landmark Detection in the Wild](https://arxiv.org/abs/2003.03771) (IJCV 2021)
|
||||
|
||||
**Source**: [yakhyo/pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) — ONNX export from [jhb86253817/PIPNet](https://github.com/jhb86253817/PIPNet)
|
||||
|
||||
!!! note "Auto-selected meanface"
|
||||
Both variants share the same architecture; the number of landmarks (and the matching
|
||||
meanface table) is inferred from the ONNX output channel count.
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
@@ -187,7 +225,7 @@ Facial landmark localization model.
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebA
|
||||
**Dataset**: Trained on [CelebA](datasets.md#celeba)
|
||||
|
||||
!!! warning "Accuracy Note"
|
||||
Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
@@ -201,7 +239,7 @@ Facial landmark localization model.
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
**Dataset**: Trained on [FairFace](datasets.md#fairface) dataset with balanced demographics
|
||||
|
||||
!!! tip "Equitable Predictions"
|
||||
FairFace provides more equitable predictions across different racial and gender groups.
|
||||
@@ -219,12 +257,12 @@ Facial landmark localization model.
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Dataset**: Trained on [AffectNet](datasets.md#affectnet)
|
||||
|
||||
!!! note "Accuracy Note"
|
||||
Emotion detection accuracy depends heavily on facial expression clarity and cultural context.
|
||||
@@ -235,7 +273,7 @@ Facial landmark localization model.
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
Gaze direction prediction models trained on [Gaze360](datasets.md#gaze360) dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* |
|
||||
| -------------- | ------ | ------- | ----- |
|
||||
@@ -248,7 +286,7 @@ Gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vert
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
**Dataset**: Trained on [Gaze360](datasets.md#gaze360) (indoor/outdoor scenes with diverse head poses)
|
||||
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
@@ -257,6 +295,33 @@ Gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vert
|
||||
|
||||
---
|
||||
|
||||
## Head Pose Estimation Models
|
||||
|
||||
### HeadPose Family
|
||||
|
||||
Head pose estimation models using 6D rotation representation. Trained on [300W-LP](datasets.md#300w-lp) dataset, evaluated on AFLW2000. Returns pitch, yaw, and roll angles in degrees.
|
||||
|
||||
| Model Name | Backbone | Size | MAE* |
|
||||
| -------------- | -------- | ------- | ----- |
|
||||
| `RESNET18` :material-check-circle: | ResNet18 | 43 MB | 5.22° |
|
||||
| `RESNET34` | ResNet34 | 82 MB | 5.07° |
|
||||
| `RESNET50` | ResNet50 | 91 MB | 4.83° |
|
||||
| `MOBILENET_V2` | MobileNetV2 | 9.6 MB | 5.72° |
|
||||
| `MOBILENET_V3_SMALL` | MobileNetV3-Small | 4.8 MB | 6.31° |
|
||||
| `MOBILENET_V3_LARGE` | MobileNetV3-Large | 16 MB | 5.58° |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on AFLW2000 test set — lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [300W-LP](datasets.md#300w-lp) (synthesized large-pose faces from 300W)
|
||||
|
||||
**Method**: 6D rotation representation (rotation matrix → Euler angles)
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing Models
|
||||
|
||||
### BiSeNet Family
|
||||
@@ -269,7 +334,7 @@ BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segme
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
**Dataset**: Trained on [CelebAMask-HQ](datasets.md#celebamask-hq)
|
||||
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
|
||||
@@ -279,13 +344,13 @@ BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segme
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 1 | Background | 8 | Left Ear | 15 | Neck |
|
||||
| 2 | Skin | 9 | Right Ear | 16 | Neck Lace |
|
||||
| 3 | Left Eyebrow | 10 | Ear Ring | 17 | Cloth |
|
||||
| 4 | Right Eyebrow | 11 | Nose | 18 | Hair |
|
||||
| 5 | Left Eye | 12 | Mouth | 19 | Hat |
|
||||
| 6 | Right Eye | 13 | Upper Lip | | |
|
||||
| 7 | Eye Glasses | 14 | Lower Lip | | |
|
||||
| 0 | Background | 7 | Left Ear | 14 | Neck |
|
||||
| 1 | Skin | 8 | Right Ear | 15 | Neck Lace |
|
||||
| 2 | Left Eyebrow | 9 | Ear Ring | 16 | Cloth |
|
||||
| 3 | Right Eyebrow | 10 | Nose | 17 | Hair |
|
||||
| 4 | Left Eye | 11 | Mouth | 18 | Hat |
|
||||
| 5 | Right Eye | 12 | Upper Lip | | |
|
||||
| 6 | Eye Glasses | 13 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
@@ -300,6 +365,62 @@ BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segme
|
||||
|
||||
---
|
||||
|
||||
### XSeg
|
||||
|
||||
XSeg from DeepFaceLab outputs masks for face regions. Requires 5-point landmarks for face alignment.
|
||||
|
||||
| Model Name | Size | Output |
|
||||
|------------|--------|--------|
|
||||
| `DEFAULT` | 67 MB | Mask [0, 1] |
|
||||
|
||||
!!! info "Model Details"
|
||||
**Origin**: DeepFaceLab
|
||||
|
||||
**Input**: NHWC format, normalized to [0, 1]
|
||||
|
||||
**Alignment**: Requires 5-point landmarks (not bbox crops)
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face region extraction
|
||||
- Face swapping pipelines
|
||||
- Occlusion handling
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires 5-point facial landmarks. Use a face detector like RetinaFace to obtain landmarks first.
|
||||
|
||||
---
|
||||
|
||||
## Portrait Matting Models
|
||||
|
||||
### MODNet
|
||||
|
||||
MODNet (Real-Time Trimap-Free Portrait Matting) produces soft alpha mattes from full images without requiring a trimap. Uses MobileNetV2 backbone with low-resolution, high-resolution, and fusion branches.
|
||||
|
||||
| Model Name | Variant | Size | Use Case |
|
||||
| ---------- | ------- | ---- | -------- |
|
||||
| `PHOTOGRAPHIC` :material-check-circle: | High-quality | 25 MB | Portrait photos |
|
||||
| `WEBCAM` | Real-time | 25 MB | Webcam feeds |
|
||||
|
||||
!!! info "Model Details"
|
||||
**Paper**: [MODNet: Real-Time Trimap-Free Portrait Matting via Objective Decomposition](https://arxiv.org/abs/2011.11961) (AAAI 2022)
|
||||
|
||||
**Source**: [yakhyo/modnet](https://github.com/yakhyo/modnet) — ported weights and clean inference codebase
|
||||
|
||||
**Output**: Alpha matte `(H, W)` in `[0, 1]`
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Background removal / replacement
|
||||
- Green screen compositing
|
||||
- Video conferencing virtual backgrounds
|
||||
- Portrait editing
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Operates on full images (not face crops). No trimap or face detection required.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
@@ -323,10 +444,14 @@ Face anti-spoofing models for liveness detection. Detect if a face is real (live
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/`
|
||||
- **Cache location**: `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR` env var)
|
||||
- **Inspect cache path**: `get_cache_dir()` returns the resolved active path
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Concurrent download**: `download_models([...])` fetches multiple models in parallel
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
See [Model Cache & Offline Use](concepts/model-cache-offline.md) for full details.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
@@ -342,9 +467,13 @@ Models are automatically downloaded and cached on first use.
|
||||
- **AdaFace ONNX**: [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) - ONNX export and inference
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Head Pose Estimation**: [yakhyo/head-pose-estimation](https://github.com/yakhyo/head-pose-estimation) - 6D rotation head pose estimation training and ONNX models
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Segmentation**: [yakhyo/face-segmentation](https://github.com/yakhyo/face-segmentation) - XSeg ONNX Inference
|
||||
- **Portrait Matting**: [yakhyo/modnet](https://github.com/yakhyo/modnet) - MODNet ported weights and inference (from [ZHKKKe/MODNet](https://github.com/ZHKKKe/MODNet))
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **PIPNet**: [yakhyo/pipnet-onnx](https://github.com/yakhyo/pipnet-onnx) - PIPNet ONNX export and inference (from [jhb86253817/PIPNet](https://github.com/jhb86253817/PIPNet))
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
@@ -355,4 +484,6 @@ Models are automatically downloaded and cached on first use.
|
||||
- **AdaFace**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
- **MODNet**: [Real-Time Trimap-Free Portrait Matting via Objective Decomposition](https://arxiv.org/abs/2011.11961)
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
- **PIPNet**: [Towards Efficient Facial Landmark Detection in the Wild](https://arxiv.org/abs/2003.03771)
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
Facial attribute analysis for age, gender, race, and emotion detection.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="100%" }
|
||||
<figcaption>Age and gender prediction with detection bounding boxes</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
@@ -21,7 +26,8 @@ Predicts exact age and binary gender.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
@@ -29,9 +35,10 @@ age_gender = AgeGender()
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
result = age_gender.predict(image, face)
|
||||
print(f"Gender: {result.sex}") # "Female" or "Male"
|
||||
print(f"Age: {result.age} years")
|
||||
# face.gender and face.age are also set automatically
|
||||
```
|
||||
|
||||
### Output
|
||||
@@ -54,7 +61,8 @@ Predicts gender, age group, and race with balanced demographics.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
@@ -62,10 +70,11 @@ fairface = FairFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(image, face.bbox)
|
||||
result = fairface.predict(image, face)
|
||||
print(f"Gender: {result.sex}")
|
||||
print(f"Age Group: {result.age_group}")
|
||||
print(f"Race: {result.race}")
|
||||
# face.gender, face.age_group, face.race are also set automatically
|
||||
```
|
||||
|
||||
### Output
|
||||
@@ -120,7 +129,7 @@ Predicts facial emotions. Requires PyTorch.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
@@ -130,7 +139,7 @@ emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = emotion.predict(image, face.landmarks)
|
||||
result = emotion.predict(image, face)
|
||||
print(f"Emotion: {result.emotion}")
|
||||
print(f"Confidence: {result.confidence:.2%}")
|
||||
```
|
||||
@@ -147,7 +156,7 @@ for face in faces:
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Angry |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
@@ -159,7 +168,7 @@ for face in faces:
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Angry |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
@@ -177,12 +186,29 @@ emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
Use `create_attribute_predictor()` for dynamic model selection:
|
||||
|
||||
```python
|
||||
from uniface import create_attribute_predictor
|
||||
|
||||
age_gender = create_attribute_predictor('age_gender')
|
||||
fairface = create_attribute_predictor('fairface')
|
||||
emotion = create_attribute_predictor('emotion')
|
||||
```
|
||||
|
||||
Available model names: `'age_gender'`, `'fairface'`, `'emotion'`.
|
||||
|
||||
---
|
||||
|
||||
## Combining Models
|
||||
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender, FairFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
@@ -192,10 +218,10 @@ faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Get exact age from AgeGender
|
||||
ag_result = age_gender.predict(image, face.bbox)
|
||||
ag_result = age_gender.predict(image, face)
|
||||
|
||||
# Get race from FairFace
|
||||
ff_result = fairface.predict(image, face.bbox)
|
||||
ff_result = fairface.predict(image, face)
|
||||
|
||||
print(f"Gender: {ag_result.sex}")
|
||||
print(f"Exact Age: {ag_result.age}")
|
||||
@@ -206,12 +232,13 @@ for face in faces:
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=False,
|
||||
attributes=True # Uses AgeGender
|
||||
RetinaFace(),
|
||||
attributes=[AgeGender()],
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
@@ -253,7 +280,7 @@ def draw_attributes(image, face, result):
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
result = age_gender.predict(image, face)
|
||||
image = draw_attributes(image, face, result)
|
||||
|
||||
cv2.imwrite("attributes.jpg", image)
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides four detection models.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="100%" }
|
||||
<figcaption>SCRFD detection with corner-style bounding boxes and 5-point landmarks</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
@@ -24,7 +29,7 @@ Single-shot face detector with multi-scale feature pyramid.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
@@ -38,7 +43,7 @@ for face in faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
@@ -82,7 +87,7 @@ State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.detection import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
@@ -91,7 +96,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
@@ -127,7 +132,7 @@ YOLO-based detection optimized for faces.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.detection import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
@@ -136,7 +141,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.detection import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
@@ -179,7 +184,7 @@ Anchor-free detection with DFL (Distribution Focal Loss) for accurate bbox regre
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
from uniface.detection import YOLOv8Face
|
||||
|
||||
detector = YOLOv8Face()
|
||||
faces = detector.detect(image)
|
||||
@@ -188,7 +193,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
from uniface.detection import YOLOv8Face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
# Lightweight
|
||||
@@ -225,7 +230,7 @@ detector = YOLOv8Face(
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
@@ -238,22 +243,6 @@ detector = create_detector('yolov8face')
|
||||
|
||||
---
|
||||
|
||||
## High-Level API
|
||||
|
||||
One-line detection:
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# Using RetinaFace (default)
|
||||
faces = detect_faces(image, method='retinaface', confidence_threshold=0.5)
|
||||
|
||||
# Using YOLOv8-Face
|
||||
faces = detect_faces(image, method='yolov8face', confidence_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
All detectors return `list[Face]`:
|
||||
@@ -276,14 +265,12 @@ for face in faces:
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces],
|
||||
vis_threshold=0.6
|
||||
faces=faces,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
cv2.imwrite("result.jpg", image)
|
||||
@@ -296,7 +283,7 @@ cv2.imwrite("result.jpg", image)
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source image.jpg --iterations 100
|
||||
python tools/detect.py --source image.jpg
|
||||
```
|
||||
|
||||
---
|
||||
@@ -304,6 +291,6 @@ python tools/detection.py --source image.jpg --iterations 100
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](recognition.md) - Extract embeddings from detected faces
|
||||
- [Landmarks Module](landmarks.md) - Get 106-point landmarks
|
||||
- [Landmarks Module](landmarks.md) - Get 106 / 98 / 68-point dense landmarks
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete detection workflow
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning detection parameters
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="100%" }
|
||||
<figcaption>Gaze direction arrows with pitch/yaw angle labels</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
@@ -23,7 +28,8 @@ Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -52,7 +58,7 @@ for face in faces:
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
@@ -102,7 +108,7 @@ yaw = -90° ────┼──── yaw = +90°
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
@@ -154,8 +160,9 @@ def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -256,7 +263,7 @@ print(f"Looking: {direction}")
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_gaze_estimator
|
||||
from uniface.gaze import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
@@ -265,6 +272,7 @@ gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Head Pose Estimation](headpose.md) - 3D head orientation
|
||||
- [Anti-Spoofing](spoofing.md) - Face liveness detection
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Video Recipe](../recipes/video-webcam.md) - Real-time processing
|
||||
|
||||
237
docs/modules/headpose.md
Normal file
@@ -0,0 +1,237 @@
|
||||
# Head Pose Estimation
|
||||
|
||||
Head pose estimation predicts the 3D orientation of a person's head (pitch, yaw, and roll angles).
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="100%" }
|
||||
<figcaption>3D head pose visualization with pitch, yaw, and roll angles</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | MAE* |
|
||||
|-------|----------|------|------|
|
||||
| **ResNet18** :material-check-circle: | ResNet18 | 43 MB | 5.22° |
|
||||
| ResNet34 | ResNet34 | 82 MB | 5.07° |
|
||||
| ResNet50 | ResNet50 | 91 MB | 4.83° |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 5.72° |
|
||||
| MobileNetV3-Small | MobileNetV3 | 4.8 MB | 6.31° |
|
||||
| MobileNetV3-Large | MobileNetV3 | 16 MB | 5.58° |
|
||||
|
||||
*MAE = Mean Absolute Error on AFLW2000 test set (lower is better)
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.headpose import HeadPose
|
||||
|
||||
detector = RetinaFace()
|
||||
head_pose = HeadPose()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
# Estimate head pose
|
||||
result = head_pose.estimate(face_crop)
|
||||
print(f"Pitch: {result.pitch:.1f}°, Yaw: {result.yaw:.1f}°, Roll: {result.roll:.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.headpose import HeadPose
|
||||
from uniface.constants import HeadPoseWeights
|
||||
|
||||
# Default (ResNet18, recommended balance of speed and accuracy)
|
||||
hp = HeadPose()
|
||||
|
||||
# Lightweight for mobile/edge
|
||||
hp = HeadPose(model_name=HeadPoseWeights.MOBILENET_V3_SMALL)
|
||||
|
||||
# Higher accuracy
|
||||
hp = HeadPose(model_name=HeadPoseWeights.RESNET50)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = head_pose.estimate(face_crop)
|
||||
|
||||
# HeadPoseResult dataclass
|
||||
result.pitch # Rotation around X-axis in degrees
|
||||
result.yaw # Rotation around Y-axis in degrees
|
||||
result.roll # Rotation around Z-axis in degrees
|
||||
```
|
||||
|
||||
### Angle Convention
|
||||
|
||||
```
|
||||
pitch > 0 (looking down)
|
||||
│
|
||||
│
|
||||
yaw < 0 ─────┼───── yaw > 0
|
||||
(looking left) │ (looking right)
|
||||
│
|
||||
pitch < 0 (looking up)
|
||||
|
||||
roll > 0 = clockwise tilt
|
||||
roll < 0 = counter-clockwise tilt
|
||||
```
|
||||
|
||||
- **Pitch**: Rotation around X-axis (positive = looking down)
|
||||
- **Yaw**: Rotation around Y-axis (positive = looking right)
|
||||
- **Roll**: Rotation around Z-axis (positive = tilting clockwise)
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
### 3D Cube (default)
|
||||
|
||||
The default visualization draws a wireframe cube oriented to match the head pose.
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_head_pose
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = head_pose.estimate(face_crop)
|
||||
|
||||
# Draw cube on image (default)
|
||||
draw_head_pose(image, face.bbox, result.pitch, result.yaw, result.roll)
|
||||
|
||||
cv2.imwrite("headpose_output.jpg", image)
|
||||
```
|
||||
|
||||
### Axis Visualization
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_head_pose
|
||||
|
||||
# X/Y/Z coordinate axes
|
||||
draw_head_pose(image, face.bbox, result.pitch, result.yaw, result.roll, draw_type='axis')
|
||||
```
|
||||
|
||||
### Low-Level Drawing Functions
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_head_pose_cube, draw_head_pose_axis
|
||||
|
||||
# Draw cube directly
|
||||
draw_head_pose_cube(image, yaw=10.0, pitch=-5.0, roll=2.0, bbox=[100, 100, 250, 280])
|
||||
|
||||
# Draw axes directly
|
||||
draw_head_pose_axis(image, yaw=10.0, pitch=-5.0, roll=2.0, bbox=[100, 100, 250, 280])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Head Pose Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.headpose import HeadPose
|
||||
from uniface.draw import draw_head_pose
|
||||
|
||||
detector = RetinaFace()
|
||||
head_pose = HeadPose()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = head_pose.estimate(face_crop)
|
||||
draw_head_pose(frame, face.bbox, result.pitch, result.yaw, result.roll)
|
||||
|
||||
cv2.imshow("Head Pose Estimation", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Driver Drowsiness Detection
|
||||
|
||||
```python
|
||||
def is_head_drooping(result, pitch_threshold=-15):
|
||||
"""Check if the head is drooping (looking down significantly)."""
|
||||
return result.pitch < pitch_threshold
|
||||
|
||||
result = head_pose.estimate(face_crop)
|
||||
if is_head_drooping(result):
|
||||
print("Warning: Head drooping detected")
|
||||
```
|
||||
|
||||
### Attention Monitoring
|
||||
|
||||
```python
|
||||
def is_facing_forward(result, threshold=20):
|
||||
"""Check if the person is facing roughly forward."""
|
||||
return (
|
||||
abs(result.pitch) < threshold
|
||||
and abs(result.yaw) < threshold
|
||||
and abs(result.roll) < threshold
|
||||
)
|
||||
|
||||
result = head_pose.estimate(face_crop)
|
||||
if is_facing_forward(result):
|
||||
print("Facing forward")
|
||||
else:
|
||||
print("Looking away")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.headpose import create_head_pose_estimator
|
||||
|
||||
hp = create_head_pose_estimator() # Returns HeadPose
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Gaze Estimation](gaze.md) - Eye gaze direction
|
||||
- [Anti-Spoofing](spoofing.md) - Face liveness detection
|
||||
- [Video Recipe](../recipes/video-webcam.md) - Real-time processing
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
Facial landmark detection provides precise localization of facial features.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="50%" }
|
||||
<figcaption>106-point facial landmark localization</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
@@ -9,6 +14,8 @@ Facial landmark detection provides precise localization of facial features.
|
||||
| Model | Points | Size |
|
||||
|-------|--------|------|
|
||||
| **Landmark106** | 106 | 14 MB |
|
||||
| **PIPNet (WFLW-98)** | 98 | 47 MB |
|
||||
| **PIPNet (300W+CelebA-68)** | 68 | 46 MB |
|
||||
|
||||
!!! info "5-Point Landmarks"
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face).
|
||||
@@ -20,7 +27,8 @@ Facial landmark detection provides precise localization of facial features.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
@@ -73,12 +81,54 @@ mouth = landmarks[87:106]
|
||||
|
||||
---
|
||||
|
||||
## PIPNet (98 / 68 points)
|
||||
|
||||
PIPNet (Pixel-in-Pixel Net) is a high-accuracy facial landmark detector. UniFace ships
|
||||
two ONNX variants that share a ResNet-18 backbone and 256×256 input — the only difference
|
||||
is the number of points and the dataset they were trained on.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = PIPNet() # Default: 98 points (WFLW)
|
||||
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Landmarks shape: {landmarks.shape}") # (98, 2)
|
||||
```
|
||||
|
||||
### 68-Point Variant (300W+CelebA, GSSL)
|
||||
|
||||
```python
|
||||
from uniface.constants import PIPNetWeights
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
landmarker = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68)
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
print(landmarks.shape) # (68, 2)
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
- The number of landmarks is read from the ONNX output and the matching meanface
|
||||
table is selected automatically — there is no `num_lms=` argument.
|
||||
- PIPNet uses an asymmetric crop around the bbox (+10% left / right / bottom,
|
||||
−10% top) and ImageNet normalization. This is handled internally.
|
||||
- Output landmarks are in original-image pixel coordinates as `float32`.
|
||||
|
||||
---
|
||||
|
||||
## 5-Point Landmarks (Detection)
|
||||
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
@@ -152,7 +202,7 @@ def draw_landmarks_with_connections(image, landmarks):
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
@@ -236,9 +286,17 @@ def estimate_head_pose(landmarks, image_shape):
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_landmarker
|
||||
from uniface.constants import PIPNetWeights
|
||||
from uniface.landmark import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
# Default: 106-point InsightFace model
|
||||
landmarker = create_landmarker()
|
||||
|
||||
# 98-point PIPNet (WFLW)
|
||||
landmarker = create_landmarker('pipnet')
|
||||
|
||||
# 68-point PIPNet (300W+CelebA)
|
||||
landmarker = create_landmarker('pipnet', model_name=PIPNetWeights.DW300_CELEBA_68)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
157
docs/modules/matting.md
Normal file
@@ -0,0 +1,157 @@
|
||||
# Portrait Matting
|
||||
|
||||
Portrait matting produces a soft alpha matte separating the foreground (person) from the background — no trimap needed.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="100%" }
|
||||
<figcaption>MODNet: Input → Matte → Green Screen</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Variant | Size | Use Case |
|
||||
|-------|---------|------|----------|
|
||||
| **MODNet Photographic** :material-check-circle: | PHOTOGRAPHIC | 25 MB | High-quality portrait photos |
|
||||
| MODNet Webcam | WEBCAM | 25 MB | Real-time webcam feeds |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.matting import MODNet
|
||||
|
||||
matting = MODNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
matte = matting.predict(image)
|
||||
|
||||
print(f"Matte shape: {matte.shape}") # (H, W)
|
||||
print(f"Matte dtype: {matte.dtype}") # float32
|
||||
print(f"Matte range: [{matte.min():.2f}, {matte.max():.2f}]") # [0, 1]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.matting import MODNet
|
||||
from uniface.constants import MODNetWeights
|
||||
|
||||
# Photographic (default) — best for photos
|
||||
matting = MODNet()
|
||||
|
||||
# Webcam — optimized for real-time
|
||||
matting = MODNet(model_name=MODNetWeights.WEBCAM)
|
||||
|
||||
# Custom input size
|
||||
matting = MODNet(input_size=256)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `model_name` | `PHOTOGRAPHIC` | Model variant to load |
|
||||
| `input_size` | `512` | Target shorter-side size for preprocessing |
|
||||
| `providers` | `None` | ONNX Runtime execution providers |
|
||||
|
||||
---
|
||||
|
||||
## Applications
|
||||
|
||||
### Transparent Background (RGBA)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
matting = MODNet()
|
||||
image = cv2.imread("photo.jpg")
|
||||
matte = matting.predict(image)
|
||||
|
||||
rgba = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
|
||||
rgba[:, :, 3] = (matte * 255).astype(np.uint8)
|
||||
cv2.imwrite("transparent.png", rgba)
|
||||
```
|
||||
|
||||
### Green Screen
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
matte_3ch = matte[:, :, np.newaxis]
|
||||
bg = np.full_like(image, (0, 177, 64), dtype=np.uint8)
|
||||
green = (image * matte_3ch + bg * (1 - matte_3ch)).astype(np.uint8)
|
||||
cv2.imwrite("green_screen.jpg", green)
|
||||
```
|
||||
|
||||
### Custom Background
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
background = cv2.imread("beach.jpg")
|
||||
background = cv2.resize(background, (image.shape[1], image.shape[0]))
|
||||
|
||||
matte_3ch = matte[:, :, np.newaxis]
|
||||
result = (image * matte_3ch + background * (1 - matte_3ch)).astype(np.uint8)
|
||||
cv2.imwrite("custom_bg.jpg", result)
|
||||
```
|
||||
|
||||
### Webcam Matting
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.matting import MODNet
|
||||
|
||||
matting = MODNet(model_name="modnet_webcam")
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
matte = matting.predict(frame)
|
||||
matte_3ch = matte[:, :, np.newaxis]
|
||||
bg = np.full_like(frame, (0, 177, 64), dtype=np.uint8)
|
||||
result = (frame * matte_3ch + bg * (1 - matte_3ch)).astype(np.uint8)
|
||||
|
||||
cv2.imshow("Matting", np.hstack([frame, result]))
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.matting import create_matting_model
|
||||
from uniface.constants import MODNetWeights
|
||||
|
||||
# Default (Photographic)
|
||||
matting = create_matting_model()
|
||||
|
||||
# With enum
|
||||
matting = create_matting_model(MODNetWeights.WEBCAM)
|
||||
|
||||
# With string
|
||||
matting = create_matting_model("modnet_webcam")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Parsing](parsing.md) - Face semantic segmentation
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
@@ -1,15 +1,26 @@
|
||||
# Parsing
|
||||
|
||||
Face parsing segments faces into semantic components (skin, eyes, nose, mouth, hair, etc.).
|
||||
Face parsing segments faces into semantic components or face regions.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="80%" }
|
||||
<figcaption>BiSeNet face parsing with 19 semantic component classes</figcaption>
|
||||
</figure>
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="80%" }
|
||||
<figcaption>XSeg face region segmentation mask</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Classes |
|
||||
|-------|----------|------|---------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 |
|
||||
| BiSeNet ResNet34 | ResNet34 | 89 MB | 19 |
|
||||
| Model | Backbone | Size | Output |
|
||||
|-------|----------|------|--------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 classes |
|
||||
| BiSeNet ResNet34 | ResNet34 | 89 MB | 19 classes |
|
||||
| XSeg | - | 67 MB | Mask |
|
||||
|
||||
---
|
||||
|
||||
@@ -18,7 +29,7 @@ Face parsing segments faces into semantic components (skin, eyes, nose, mouth, h
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
@@ -45,16 +56,16 @@ cv2.imwrite("parsed.jpg", vis_bgr)
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
| 0 | Background | 10 | Nose |
|
||||
| 1 | Skin | 11 | Mouth |
|
||||
| 2 | Left Eyebrow | 12 | Upper Lip |
|
||||
| 3 | Right Eyebrow | 13 | Lower Lip |
|
||||
| 4 | Left Eye | 14 | Neck |
|
||||
| 5 | Right Eye | 15 | Necklace |
|
||||
| 6 | Eyeglasses | 16 | Cloth |
|
||||
| 7 | Left Ear | 17 | Hair |
|
||||
| 8 | Right Ear | 18 | Hat |
|
||||
| 9 | Earring | | |
|
||||
|
||||
---
|
||||
|
||||
@@ -84,9 +95,9 @@ parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
@@ -125,7 +136,7 @@ mask = parser.parse(face_image)
|
||||
|
||||
# Extract specific component
|
||||
SKIN = 1
|
||||
HAIR = 18
|
||||
HAIR = 17
|
||||
LEFT_EYE = 4
|
||||
RIGHT_EYE = 5
|
||||
|
||||
@@ -148,10 +159,10 @@ mask = parser.parse(face_image)
|
||||
|
||||
component_names = {
|
||||
0: 'Background', 1: 'Skin', 2: 'L-Eyebrow', 3: 'R-Eyebrow',
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Glasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Hat', 10: 'Earring', 11: 'Nose', 12: 'Mouth',
|
||||
13: 'U-Lip', 14: 'L-Lip', 15: 'Neck', 16: 'Necklace',
|
||||
17: 'Cloth', 18: 'Hair'
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Eyeglasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Earring', 10: 'Nose', 11: 'Mouth',
|
||||
12: 'U-Lip', 13: 'L-Lip', 14: 'Neck', 15: 'Necklace',
|
||||
16: 'Cloth', 17: 'Hair', 18: 'Hat'
|
||||
}
|
||||
|
||||
for class_id in np.unique(mask):
|
||||
@@ -176,23 +187,19 @@ def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper + lower lip)
|
||||
lip_mask = ((mask == 13) | (mask == 14)).astype(np.uint8)
|
||||
# Get lip mask (upper lip=12, lower lip=13)
|
||||
lip_mask = ((mask == 12) | (mask == 13)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Blend with original
|
||||
lip_region = cv2.bitwise_and(overlay, overlay, mask=lip_mask)
|
||||
non_lip = cv2.bitwise_and(result, result, mask=1 - lip_mask)
|
||||
|
||||
# Combine with alpha blending
|
||||
# Alpha blend lip region
|
||||
alpha = 0.4
|
||||
result = cv2.addWeighted(result, 1 - alpha * lip_mask[:,:,np.newaxis] / 255,
|
||||
lip_region, alpha, 0)
|
||||
mask_3ch = lip_mask[:, :, np.newaxis]
|
||||
result = np.where(mask_3ch, (image * (1 - alpha) + overlay * alpha).astype(np.uint8), result)
|
||||
|
||||
return result.astype(np.uint8)
|
||||
return result
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
@@ -218,7 +225,7 @@ def replace_background(image, mask, background):
|
||||
```python
|
||||
def get_hair_mask(mask):
|
||||
"""Extract clean hair mask."""
|
||||
hair_mask = (mask == 18).astype(np.uint8) * 255
|
||||
hair_mask = (mask == 17).astype(np.uint8) * 255
|
||||
|
||||
# Clean up with morphological operations
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
@@ -233,7 +240,7 @@ def get_hair_mask(mask):
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
@@ -248,12 +255,83 @@ vis_result = vis_parsing_maps(
|
||||
|
||||
---
|
||||
|
||||
## XSeg
|
||||
|
||||
XSeg outputs a mask for face regions. Unlike BiSeNet which works on bbox crops, XSeg requires 5-point landmarks for face alignment.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = XSeg()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
if face.landmarks is not None:
|
||||
mask = parser.parse(image, landmarks=face.landmarks)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W), values in [0, 1]
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
```python
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
# Default settings
|
||||
parser = XSeg()
|
||||
|
||||
# Custom settings
|
||||
parser = XSeg(
|
||||
align_size=256, # Face alignment size
|
||||
blur_sigma=5, # Gaussian blur for smoothing (0 = raw)
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `align_size` | 256 | Face alignment output size |
|
||||
| `blur_sigma` | 0 | Mask smoothing (0 = no blur) |
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Full pipeline: align -> segment -> warp back to original space
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
# For pre-aligned face crops
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
|
||||
# Get mask + crop + inverse matrix for custom warping
|
||||
mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
```
|
||||
|
||||
### BiSeNet vs XSeg
|
||||
|
||||
| Feature | BiSeNet | XSeg |
|
||||
|---------|---------|------|
|
||||
| Output | 19 class labels | Mask [0, 1] |
|
||||
| Input | Bbox crop | Requires landmarks |
|
||||
| Use case | Facial components | Face region extraction |
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_face_parser
|
||||
from uniface.parsing import create_face_parser
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
|
||||
parser = create_face_parser() # Returns BiSeNet
|
||||
# BiSeNet (default)
|
||||
parser = create_face_parser()
|
||||
|
||||
# XSeg
|
||||
parser = create_face_parser(XSegWeights.DEFAULT)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
Face anonymization protects privacy by blurring or obscuring faces in images and videos.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="100%" }
|
||||
<figcaption>Five anonymization methods: pixelate, gaussian, blackout, elliptical, and median</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Methods
|
||||
@@ -18,25 +23,8 @@ Face anonymization protects privacy by blurring or obscuring faces in images and
|
||||
|
||||
## Quick Start
|
||||
|
||||
### One-Line Anonymization
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BlurFace Class
|
||||
|
||||
For more control, use the `BlurFace` class:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
@@ -59,12 +47,12 @@ cv2.imwrite("anonymized.jpg", anonymized)
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=10)
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=15)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `pixel_blocks` | 10 | Number of blocks (lower = more pixelated) |
|
||||
| `pixel_blocks` | 15 | Number of blocks (lower = more pixelated) |
|
||||
|
||||
### Gaussian
|
||||
|
||||
@@ -137,7 +125,7 @@ result = blurrer.anonymize(image, faces, inplace=True)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -166,7 +154,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -238,7 +226,7 @@ def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -259,13 +247,13 @@ for method in methods:
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -2,6 +2,11 @@
|
||||
|
||||
Face recognition extracts embeddings for identity verification and face search.
|
||||
|
||||
<figure markdown="span">
|
||||
{ width="80%" }
|
||||
<figcaption>Pairwise face verification with cosine similarity scores</figcaption>
|
||||
</figure>
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
@@ -10,6 +15,7 @@ Face recognition extracts embeddings for identity verification and face search.
|
||||
|-------|----------|------|---------------|
|
||||
| **AdaFace** | IR-18/IR-101 | 92-249 MB | 512 |
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 |
|
||||
| **EdgeFace** | EdgeNeXt/LoRA | 5-70 MB | 512 |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 |
|
||||
|
||||
@@ -22,7 +28,8 @@ Face recognition using adaptive margin based on image quality.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AdaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import AdaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = AdaFace()
|
||||
@@ -39,7 +46,7 @@ if faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import AdaFace
|
||||
from uniface.recognition import AdaFace
|
||||
from uniface.constants import AdaFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
@@ -69,7 +76,8 @@ Face recognition using additive angular margin loss.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -86,7 +94,7 @@ if faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
@@ -111,6 +119,64 @@ recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
|
||||
---
|
||||
|
||||
## EdgeFace
|
||||
|
||||
Efficient face recognition designed for edge devices, using an EdgeNeXt backbone with optional LoRA low-rank compression. Competition-winning entry (compact track) at EFaR 2023, IJCB.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import EdgeFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = EdgeFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (512,)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.recognition import EdgeFace
|
||||
from uniface.constants import EdgeFaceWeights
|
||||
|
||||
# Ultra-compact (default)
|
||||
recognizer = EdgeFace(model_name=EdgeFaceWeights.XXS)
|
||||
|
||||
# Compact with LoRA
|
||||
recognizer = EdgeFace(model_name=EdgeFaceWeights.XS_GAMMA_06)
|
||||
|
||||
# Small with LoRA
|
||||
recognizer = EdgeFace(model_name=EdgeFaceWeights.S_GAMMA_05)
|
||||
|
||||
# Full-size
|
||||
recognizer = EdgeFace(model_name=EdgeFaceWeights.BASE)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = EdgeFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Params | MFLOPs | Size | LFW | CALFW | CPLFW | CFP-FP | AgeDB-30 |
|
||||
|---------|--------|--------|------|-----|-------|-------|--------|----------|
|
||||
| **XXS** :material-check-circle: | 1.24M | 94 | ~5 MB | 99.57% | 94.83% | 90.27% | 93.63% | 94.92% |
|
||||
| XS_GAMMA_06 | 1.77M | 154 | ~7 MB | 99.73% | 95.28% | 91.58% | 94.71% | 96.08% |
|
||||
| S_GAMMA_05 | 3.65M | 306 | ~14 MB | 99.78% | 95.55% | 92.48% | 95.74% | 97.03% |
|
||||
| BASE | 18.2M | 1399 | ~70 MB | 99.83% | 96.07% | 93.75% | 97.01% | 97.60% |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [EdgeFace: Efficient Face Recognition Model for Edge Devices](https://arxiv.org/abs/2307.01838v2) (IEEE T-BIOM 2024)
|
||||
|
||||
**Source**: [github.com/otroshi/edgeface](https://github.com/otroshi/edgeface)
|
||||
|
||||
---
|
||||
|
||||
## MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
@@ -118,7 +184,7 @@ Lightweight face recognition models with MobileNet backbones.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.recognition import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
@@ -127,7 +193,7 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.recognition import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
@@ -156,7 +222,7 @@ Face recognition using angular softmax loss (A-Softmax).
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.recognition import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
@@ -175,7 +241,7 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
@@ -211,7 +277,7 @@ Recognition models require aligned faces. UniFace handles this internally:
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
@@ -223,7 +289,8 @@ aligned_face = face_alignment(image, landmarks)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -282,11 +349,12 @@ else:
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_recognizer
|
||||
from uniface.recognition import create_recognizer
|
||||
|
||||
# Available methods: 'arcface', 'adaface', 'mobileface', 'sphereface'
|
||||
# Available methods: 'arcface', 'adaface', 'edgeface', 'mobileface', 'sphereface'
|
||||
recognizer = create_recognizer('arcface')
|
||||
recognizer = create_recognizer('adaface')
|
||||
recognizer = create_recognizer('edgeface')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -17,7 +17,7 @@ Face anti-spoofing detects whether a face is real (live) or fake (photo, video r
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -69,20 +69,21 @@ spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
|
||||
## Confidence Thresholds
|
||||
|
||||
The default threshold is 0.5. Adjust for your use case:
|
||||
`result.is_real` is based on the model's top predicted class (argmax). If you want stricter behavior,
|
||||
apply your own confidence threshold:
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# High security (fewer false accepts)
|
||||
HIGH_THRESHOLD = 0.7
|
||||
if result.confidence > HIGH_THRESHOLD:
|
||||
if result.is_real and result.confidence > HIGH_THRESHOLD:
|
||||
print("Real (high confidence)")
|
||||
else:
|
||||
print("Suspicious")
|
||||
|
||||
# Balanced
|
||||
if result.is_real: # Uses default 0.5 threshold
|
||||
# Balanced (argmax decision)
|
||||
if result.is_real:
|
||||
print("Real")
|
||||
else:
|
||||
print("Fake")
|
||||
@@ -127,7 +128,7 @@ cv2.imwrite("spoofing_result.jpg", image)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -252,7 +253,7 @@ python tools/spoofing.py --source 0
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_spoofer
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
172
docs/modules/stores.md
Normal file
@@ -0,0 +1,172 @@
|
||||
# Stores
|
||||
|
||||
FAISS-backed vector store for fast similarity search over embeddings.
|
||||
|
||||
!!! info "Optional dependency"
|
||||
```bash
|
||||
pip install faiss-cpu
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FAISS
|
||||
|
||||
```python
|
||||
from uniface.stores import FAISS
|
||||
```
|
||||
|
||||
A thin wrapper around a FAISS `IndexFlatIP` (inner-product) index. Vectors
|
||||
**must** be L2-normalised before adding so that inner product equals cosine
|
||||
similarity. The store does not normalise internally.
|
||||
|
||||
Each vector is paired with a metadata `dict` that can carry any
|
||||
JSON-serialisable payload (person ID, name, source path, etc.).
|
||||
|
||||
### Constructor
|
||||
|
||||
```python
|
||||
store = FAISS(embedding_size=512, db_path="./vector_index")
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `embedding_size` | `int` | `512` | Dimension of embedding vectors |
|
||||
| `db_path` | `str` | `"./vector_index"` | Directory for persisting index and metadata |
|
||||
|
||||
---
|
||||
|
||||
### Methods
|
||||
|
||||
#### `add(embedding, metadata)`
|
||||
|
||||
Add a single embedding with associated metadata.
|
||||
|
||||
```python
|
||||
store.add(embedding, {"person_id": "alice", "source": "photo.jpg"})
|
||||
```
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `embedding` | `np.ndarray` | L2-normalised embedding vector |
|
||||
| `metadata` | `dict[str, Any]` | Arbitrary JSON-serialisable key-value pairs |
|
||||
|
||||
---
|
||||
|
||||
#### `search(embedding, threshold=0.4)`
|
||||
|
||||
Find the closest match for a query embedding.
|
||||
|
||||
```python
|
||||
result, similarity = store.search(query_embedding, threshold=0.4)
|
||||
if result:
|
||||
print(result["person_id"], similarity)
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `embedding` | `np.ndarray` | — | L2-normalised query vector |
|
||||
| `threshold` | `float` | `0.4` | Minimum cosine similarity to accept a match |
|
||||
|
||||
**Returns:** `(metadata, similarity)` if a match is found, or `(None, similarity)` when below threshold or the index is empty.
|
||||
|
||||
---
|
||||
|
||||
#### `remove(key, value)`
|
||||
|
||||
Remove all entries where `metadata[key] == value` and rebuild the index.
|
||||
|
||||
```python
|
||||
removed = store.remove("person_id", "bob")
|
||||
print(f"Removed {removed} entries")
|
||||
```
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `key` | `str` | Metadata key to match |
|
||||
| `value` | `Any` | Value to match |
|
||||
|
||||
**Returns:** Number of entries removed.
|
||||
|
||||
---
|
||||
|
||||
#### `save()`
|
||||
|
||||
Persist the FAISS index and metadata to disk.
|
||||
|
||||
```python
|
||||
store.save()
|
||||
```
|
||||
|
||||
Writes two files to `db_path`:
|
||||
|
||||
- `faiss_index.bin` — binary FAISS index
|
||||
- `metadata.json` — JSON array of metadata dicts
|
||||
|
||||
---
|
||||
|
||||
#### `load()`
|
||||
|
||||
Load a previously saved index and metadata.
|
||||
|
||||
```python
|
||||
store = FAISS(db_path="./vector_index")
|
||||
loaded = store.load() # True if files exist
|
||||
```
|
||||
|
||||
**Returns:** `True` if loaded successfully, `False` if files are missing.
|
||||
|
||||
**Raises:** `RuntimeError` if files exist but cannot be read.
|
||||
|
||||
---
|
||||
|
||||
### Properties
|
||||
|
||||
| Property | Type | Description |
|
||||
|----------|------|-------------|
|
||||
| `size` | `int` | Number of vectors in the index |
|
||||
| `len(store)` | `int` | Same as `size` |
|
||||
|
||||
---
|
||||
|
||||
## Example: End-to-End
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.stores import FAISS
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build
|
||||
store = FAISS(db_path="./my_index")
|
||||
|
||||
image = cv2.imread("alice.jpg")
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(embedding, {"person_id": "alice"})
|
||||
store.save()
|
||||
|
||||
# Search
|
||||
store2 = FAISS(db_path="./my_index")
|
||||
store2.load()
|
||||
|
||||
query = cv2.imread("unknown.jpg")
|
||||
faces = detector.detect(query)
|
||||
emb = recognizer.get_normalized_embedding(query, faces[0].landmarks)
|
||||
|
||||
result, sim = store2.search(emb)
|
||||
if result:
|
||||
print(f"Matched: {result['person_id']} (similarity: {sim:.3f})")
|
||||
else:
|
||||
print(f"No match (similarity: {sim:.3f})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Building and querying indexes
|
||||
- [Recognition Module](recognition.md) - Embedding extraction
|
||||
- [Thresholds Guide](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
263
docs/modules/tracking.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Tracking
|
||||
|
||||
Multi-object tracking using [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) with Kalman filtering and IoU-based association. The tracker assigns persistent IDs to detected objects across video frames using a two-stage association strategy — first matching high-confidence detections, then low-confidence ones.
|
||||
|
||||
---
|
||||
|
||||
## How It Works
|
||||
|
||||
BYTETracker takes detection bounding boxes as input and returns tracked bounding boxes with persistent IDs. It does not depend on any specific detector — any source of `[x1, y1, x2, y2, score]` arrays will work.
|
||||
|
||||
Each frame, the tracker:
|
||||
|
||||
1. Splits detections into high-confidence and low-confidence groups
|
||||
2. Matches high-confidence detections to existing tracks using IoU
|
||||
3. Matches remaining tracks to low-confidence detections (second chance)
|
||||
4. Starts new tracks for unmatched high-confidence detections
|
||||
5. Removes tracks that have been lost for too long
|
||||
|
||||
The Kalman filter predicts where each track will be in the next frame, which helps maintain associations even when detections are noisy.
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# 1. Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# 2. Build detections array: [x1, y1, x2, y2, score]
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# 3. Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# 4. Map track IDs back to face objects
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
# 5. Draw
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
Each track ID gets a deterministic color via golden-ratio hue stepping, so the same person keeps the same color across the entire video.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking - Press 'q' to quit", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parameters
|
||||
|
||||
```python
|
||||
from uniface.tracking import BYTETracker
|
||||
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.5,
|
||||
track_buffer=30,
|
||||
match_thresh=0.8,
|
||||
low_thresh=0.1,
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `track_thresh` | 0.5 | Detections above this score go through first-pass association |
|
||||
| `track_buffer` | 30 | How many frames to keep a lost track before removing it |
|
||||
| `match_thresh` | 0.8 | IoU threshold for matching tracks to detections |
|
||||
| `low_thresh` | 0.1 | Detections below this score are discarded entirely |
|
||||
|
||||
---
|
||||
|
||||
## Input / Output
|
||||
|
||||
**Input** — `(N, 5)` numpy array with `[x1, y1, x2, y2, confidence]` per detection:
|
||||
|
||||
```python
|
||||
detections = np.array([
|
||||
[100, 50, 200, 160, 0.95],
|
||||
[300, 80, 380, 200, 0.87],
|
||||
])
|
||||
```
|
||||
|
||||
**Output** — `(M, 5)` numpy array with `[x1, y1, x2, y2, track_id]` per active track:
|
||||
|
||||
```python
|
||||
tracks = tracker.update(detections)
|
||||
# array([[101.2, 51.3, 199.8, 159.8, 1.],
|
||||
# [300.5, 80.2, 379.7, 200.1, 2.]])
|
||||
```
|
||||
|
||||
The output bounding boxes come from the Kalman filter prediction, so they may differ slightly from the input. Track IDs are integers that persist across frames for the same object.
|
||||
|
||||
---
|
||||
|
||||
## Resetting the Tracker
|
||||
|
||||
When switching to a different video or scene, reset the tracker to clear all internal state:
|
||||
|
||||
```python
|
||||
tracker.reset()
|
||||
```
|
||||
|
||||
This clears all active, lost, and removed tracks, resets the frame counter, and resets the ID counter back to zero.
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
`draw_tracks` draws bounding boxes color-coded by track ID:
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
draw_tracks(
|
||||
image=frame,
|
||||
faces=tracked_faces,
|
||||
draw_landmarks=True,
|
||||
draw_id=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Small Face Performance
|
||||
|
||||
!!! warning "Tracking performance with small faces"
|
||||
The tracker relies on IoU (Intersection over Union) to match detections across
|
||||
frames. When faces occupy a small portion of the image — for example in
|
||||
surveillance footage or wide-angle cameras — even slight movement between frames
|
||||
can cause a large drop in IoU. This makes it harder for the tracker to maintain
|
||||
consistent IDs, and you may see IDs switching or resetting more often than expected.
|
||||
|
||||
This is not specific to BYTETracker; it applies to any IoU-based tracker. A few
|
||||
things that can help:
|
||||
|
||||
- **Lower `match_thresh`** (e.g. `0.5` or `0.6`) so the tracker accepts lower
|
||||
overlap as a valid match.
|
||||
- **Increase `track_buffer`** (e.g. `60` or higher) to hold onto lost tracks
|
||||
longer before discarding them.
|
||||
- **Use a higher-resolution input** if possible, so face bounding boxes are
|
||||
larger in pixel terms.
|
||||
|
||||
```python
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.4,
|
||||
track_buffer=60,
|
||||
match_thresh=0.6,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI Tool
|
||||
|
||||
```bash
|
||||
# Track faces in a video
|
||||
python tools/track.py --source video.mp4
|
||||
|
||||
# Webcam
|
||||
python tools/track.py --source 0
|
||||
|
||||
# Save output
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
|
||||
# Use RetinaFace instead of SCRFD
|
||||
python tools/track.py --source video.mp4 --detector retinaface
|
||||
|
||||
# Keep lost tracks longer
|
||||
python tools/track.py --source video.mp4 --track-buffer 60
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [yakhyo/bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) — standalone BYTETracker implementation used in UniFace
|
||||
- [ByteTrack paper](https://arxiv.org/abs/2110.06864) — Zhang et al., "ByteTrack: Multi-Object Tracking by Associating Every Detection Box"
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection](detection.md) — face detection models
|
||||
- [Video & Webcam](../recipes/video-webcam.md) — video processing patterns
|
||||
- [Inputs & Outputs](../concepts/inputs-outputs.md) — data types and formats
|
||||
@@ -12,10 +12,15 @@ Run UniFace examples directly in your browser with Google Colab, or download and
|
||||
| [Face Alignment](https://github.com/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Align faces for recognition |
|
||||
| [Face Verification](https://github.com/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [Face Search](https://github.com/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [Face Analyzer](https://github.com/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one face analysis |
|
||||
| [Face Analyzer](https://github.com/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | Unified face analysis |
|
||||
| [Face Parsing](https://github.com/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [Face Anonymization](https://github.com/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [Gaze Estimation](https://github.com/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
| [Face Segmentation](https://github.com/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | Face segmentation with XSeg |
|
||||
| [Face Vector Store](https://github.com/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | FAISS-backed face database |
|
||||
| [Head Pose Estimation](https://github.com/yakhyo/uniface/blob/main/examples/11_head_pose_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/11_head_pose_estimation.ipynb) | 3D head orientation estimation |
|
||||
| [Face Recognition](https://github.com/yakhyo/uniface/blob/main/examples/12_face_recognition.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/12_face_recognition.ipynb) | Standalone face recognition pipeline |
|
||||
| [Portrait Matting](https://github.com/yakhyo/uniface/blob/main/examples/13_portrait_matting.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/13_portrait_matting.ipynb) | Portrait matting with MODNet |
|
||||
|
||||
---
|
||||
|
||||
@@ -29,7 +34,7 @@ git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dependencies
|
||||
pip install uniface jupyter
|
||||
pip install "uniface[cpu]" jupyter # or uniface[gpu] for CUDA
|
||||
|
||||
# Launch Jupyter
|
||||
jupyter notebook examples/
|
||||
|
||||
7
docs/overrides/main.html
Normal file
@@ -0,0 +1,7 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block announce %}
|
||||
<a href="https://github.com/yakhyo/uniface" target="_blank" rel="noopener">
|
||||
Support our work — give UniFace a <span class="twemoji">{% include ".icons/octicons/star-fill-16.svg" %}</span> on <strong>GitHub</strong> and help us reach more developers!
|
||||
</a>
|
||||
{% endblock %}
|
||||
@@ -10,7 +10,7 @@ Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
@@ -46,27 +46,16 @@ Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
draw_detections(image=image, faces=faces, vis_threshold=0.6)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
@@ -80,8 +69,8 @@ Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -96,12 +85,13 @@ faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
# Extract embeddings (normalized 1-D vectors)
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
# Compute cosine similarity
|
||||
from uniface import compute_similarity
|
||||
similarity = compute_similarity(emb1, emb2, normalized=True)
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
@@ -121,7 +111,8 @@ if faces1 and faces2:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -133,7 +124,7 @@ faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
result = age_gender.predict(image, face)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
```
|
||||
|
||||
@@ -152,7 +143,8 @@ Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
@@ -161,7 +153,7 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
result = fairface.predict(image, face)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
@@ -174,21 +166,24 @@ Face 2: Female, 20-29, White
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmarks (106 Points)
|
||||
## Facial Landmarks (106 / 98 / 68 Points)
|
||||
|
||||
UniFace ships two dense-landmark families. Pick whichever fits your downstream task:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
landmarker = Landmark106() # 106-point InsightFace 2d106det model
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
print(f"Detected {len(landmarks)} landmarks") # 106
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
@@ -197,6 +192,21 @@ if faces:
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
**PIPNet (98 / 68 points)** — ResNet-18 backbone trained on WFLW (98 pts) or 300W+CelebA (68 pts):
|
||||
|
||||
```python
|
||||
from uniface.constants import PIPNetWeights
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
# 98-point WFLW model (default)
|
||||
landmarker_98 = PIPNet()
|
||||
|
||||
# 68-point 300W+CelebA model
|
||||
landmarker_68 = PIPNet(model_name=PIPNetWeights.DW300_CELEBA_68)
|
||||
|
||||
landmarks = landmarker_98.get_landmarks(image, faces[0].bbox) # (98, 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation
|
||||
@@ -204,8 +214,9 @@ if faces:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -229,6 +240,36 @@ cv2.imwrite("gaze_output.jpg", image)
|
||||
|
||||
---
|
||||
|
||||
## Head Pose Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.headpose import HeadPose
|
||||
from uniface.draw import draw_head_pose
|
||||
|
||||
detector = RetinaFace()
|
||||
head_pose = HeadPose()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = head_pose.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={result.pitch:.1f}°, yaw={result.yaw:.1f}°, roll={result.roll:.1f}°")
|
||||
|
||||
# Draw 3D cube visualization
|
||||
draw_head_pose(image, face.bbox, result.pitch, result.yaw, result.roll)
|
||||
|
||||
cv2.imwrite("headpose_output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing
|
||||
|
||||
Segment face into semantic components:
|
||||
@@ -237,7 +278,7 @@ Segment face into semantic components:
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
@@ -256,31 +297,57 @@ print(f"Detected {len(np.unique(mask))} facial components")
|
||||
|
||||
---
|
||||
|
||||
## Portrait Matting
|
||||
|
||||
Remove backgrounds without a trimap:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.matting import MODNet
|
||||
|
||||
matting = MODNet()
|
||||
|
||||
image = cv2.imread("portrait.jpg")
|
||||
matte = matting.predict(image) # (H, W) float32 in [0, 1]
|
||||
|
||||
# Transparent PNG
|
||||
rgba = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
|
||||
rgba[:, :, 3] = (matte * 255).astype(np.uint8)
|
||||
cv2.imwrite("transparent.png", rgba)
|
||||
|
||||
# Green screen
|
||||
matte_3ch = matte[:, :, np.newaxis]
|
||||
bg = np.full_like(image, (0, 177, 64), dtype=np.uint8)
|
||||
result = (image * matte_3ch + bg * (1 - matte_3ch)).astype(np.uint8)
|
||||
cv2.imwrite("green_screen.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Anonymization
|
||||
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Manual control:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Custom blur settings:**
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
@@ -301,7 +368,7 @@ Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -324,8 +391,8 @@ Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -339,10 +406,7 @@ while True:
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks)
|
||||
draw_detections(image=frame, faces=faces)
|
||||
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
@@ -355,6 +419,60 @@ cv2.destroyAllWindows()
|
||||
|
||||
---
|
||||
|
||||
## Face Tracking
|
||||
|
||||
Track faces across video frames with persistent IDs:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# Assign track IDs to faces
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details, see the [Tracking module](modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
@@ -364,8 +482,11 @@ For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
| Task | Available Models |
|
||||
|------|------------------|
|
||||
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `EdgeFace`, `MobileFace`, `SphereFace` |
|
||||
| Landmarks | `Landmark106` (106 pts), `PIPNet` (98 / 68 pts) |
|
||||
| Tracking | `BYTETracker` |
|
||||
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
|
||||
| Head Pose | `HeadPose` (ResNet18/34/50, MobileNetV2/V3) |
|
||||
| Parsing | `BiSeNet` (ResNet18/34) |
|
||||
| Attributes | `AgeGender`, `FairFace`, `Emotion` |
|
||||
| Anti-Spoofing | `MiniFASNet` (V1SE, V2) |
|
||||
@@ -407,13 +528,19 @@ python -c "import platform; print(platform.machine())"
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Also works (re-exported at package level)
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, AdaFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.landmark import Landmark106, PIPNet
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.headpose import HeadPose
|
||||
from uniface.parsing import BiSeNet, XSeg
|
||||
from uniface.privacy import BlurFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.stores import FAISS # pip install faiss-cpu
|
||||
from uniface.draw import draw_detections, draw_tracks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -11,7 +11,7 @@ Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -40,7 +40,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -67,14 +67,19 @@ out.release()
|
||||
|
||||
---
|
||||
|
||||
## One-Liner for Images
|
||||
## Single Image
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
result = anonymize_faces(image, method='pixelate')
|
||||
faces = detector.detect(image)
|
||||
result = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
@@ -84,7 +89,7 @@ cv2.imwrite("anonymized.jpg", result)
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=10)` |
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=15)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
|
||||
@@ -12,7 +12,7 @@ Process multiple images efficiently.
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -54,7 +54,8 @@ for image_path in tqdm(image_files, desc="Processing"):
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -27,29 +27,29 @@ import numpy as np
|
||||
|
||||
class MyDetector(BaseDetector):
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.5):
|
||||
super().__init__(confidence_threshold=confidence_threshold)
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.threshold = confidence_threshold
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
raise NotImplementedError
|
||||
|
||||
def postprocess(self, outputs, shape) -> list[Face]:
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
raise NotImplementedError
|
||||
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
# 1. Preprocess image
|
||||
input_tensor = self._preprocess(image)
|
||||
input_tensor = self.preprocess(image)
|
||||
|
||||
# 2. Run inference
|
||||
outputs = self.session.run(None, {'input': input_tensor})
|
||||
|
||||
# 3. Postprocess outputs to Face objects
|
||||
faces = self._postprocess(outputs, image.shape)
|
||||
return faces
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
pass
|
||||
|
||||
def _postprocess(self, outputs, shape):
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
pass
|
||||
return self.postprocess(outputs, image.shape)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -57,36 +57,14 @@ class MyDetector(BaseDetector):
|
||||
## Add Custom Recognition Model
|
||||
|
||||
```python
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface import face_alignment
|
||||
import numpy as np
|
||||
from uniface.recognition.base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
class MyRecognizer(BaseRecognizer):
|
||||
def __init__(self, model_path: str):
|
||||
self.session = create_onnx_session(model_path)
|
||||
def __init__(self, model_path: str, providers=None):
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
super().__init__(model_path, preprocessing, providers=providers)
|
||||
|
||||
def get_normalized_embedding(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
landmarks: np.ndarray
|
||||
) -> np.ndarray:
|
||||
# 1. Align face
|
||||
aligned = face_alignment(image, landmarks)
|
||||
|
||||
# 2. Preprocess
|
||||
input_tensor = self._preprocess(aligned)
|
||||
|
||||
# 3. Run inference
|
||||
embedding = self.session.run(None, {'input': input_tensor})[0]
|
||||
|
||||
# 4. Normalize
|
||||
embedding = embedding / np.linalg.norm(embedding)
|
||||
return embedding
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
pass
|
||||
# Optional: override preprocess() if your model expects custom normalization.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -1,178 +1,166 @@
|
||||
# Face Search
|
||||
|
||||
Build a face search system for finding people in images.
|
||||
Find and identify people in images and video streams.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
UniFace supports two search approaches:
|
||||
|
||||
| Approach | Use case | Tool |
|
||||
| -------------------- | ------------------------------------------------ | ----------------------- |
|
||||
| **Reference search** | "Is this specific person in the video?" | `tools/search.py` |
|
||||
| **Vector search** | "Who is this?" against a database of known faces | `tools/faiss_search.py` |
|
||||
|
||||
---
|
||||
|
||||
## Basic Face Database
|
||||
## Reference Search (single image)
|
||||
|
||||
Compare every detected face against a single reference photo:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
ref_image = cv2.imread("reference.jpg")
|
||||
ref_faces = detector.detect(ref_image)
|
||||
ref_embedding = recognizer.get_normalized_embedding(ref_image, ref_faces[0].landmarks)
|
||||
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(query_image)
|
||||
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(query_image, face.landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f"Match ({sim:.2f})" if sim > 0.4 else f"Unknown ({sim:.2f})"
|
||||
print(label)
|
||||
```
|
||||
|
||||
**CLI tool:**
|
||||
|
||||
```bash
|
||||
python tools/search.py --reference ref.jpg --source video.mp4
|
||||
python tools/search.py --reference ref.jpg --source 0 # webcam
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Vector Search (FAISS index)
|
||||
|
||||
For identifying faces against a database of many known people, use the
|
||||
[`FAISS`](../modules/stores.md) vector store.
|
||||
|
||||
!!! info "Install extra"
|
||||
`bash
|
||||
pip install faiss-cpu
|
||||
`
|
||||
|
||||
### Build an index
|
||||
|
||||
Organise face images in person sub-folders:
|
||||
|
||||
```
|
||||
dataset/
|
||||
├── alice/
|
||||
│ ├── 001.jpg
|
||||
│ └── 002.jpg
|
||||
├── bob/
|
||||
│ └── 001.jpg
|
||||
└── charlie/
|
||||
├── 001.jpg
|
||||
└── 002.jpg
|
||||
```
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.stores import FAISS
|
||||
|
||||
class FaceDatabase:
|
||||
def __init__(self):
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.embeddings = {}
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
store = FAISS(db_path="./my_index")
|
||||
|
||||
def add_face(self, person_id, image):
|
||||
"""Add a face to the database."""
|
||||
faces = self.detector.detect(image)
|
||||
if not faces:
|
||||
raise ValueError(f"No face found for {person_id}")
|
||||
for person_dir in sorted(Path("dataset").iterdir()):
|
||||
if not person_dir.is_dir():
|
||||
continue
|
||||
for img_path in person_dir.glob("*.jpg"):
|
||||
image = cv2.imread(str(img_path))
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
emb = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(emb, {"person_id": person_dir.name, "source": str(img_path)})
|
||||
|
||||
face = max(faces, key=lambda f: f.confidence)
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
self.embeddings[person_id] = embedding
|
||||
return True
|
||||
|
||||
def search(self, image, threshold=0.6):
|
||||
"""Search for faces in an image."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in self.embeddings.items():
|
||||
similarity = np.dot(embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
results.append({
|
||||
'bbox': face.bbox,
|
||||
'match': best_match if best_similarity >= threshold else None,
|
||||
'similarity': best_similarity
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def save(self, path):
|
||||
"""Save database to file."""
|
||||
np.savez(path, embeddings=dict(self.embeddings))
|
||||
|
||||
def load(self, path):
|
||||
"""Load database from file."""
|
||||
data = np.load(path, allow_pickle=True)
|
||||
self.embeddings = data['embeddings'].item()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
|
||||
# Add faces
|
||||
for image_path in Path("known_faces/").glob("*.jpg"):
|
||||
person_id = image_path.stem
|
||||
image = cv2.imread(str(image_path))
|
||||
try:
|
||||
db.add_face(person_id, image)
|
||||
print(f"Added: {person_id}")
|
||||
except ValueError as e:
|
||||
print(f"Skipped: {e}")
|
||||
|
||||
# Save database
|
||||
db.save("face_database.npz")
|
||||
|
||||
# Search
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
results = db.search(query_image)
|
||||
|
||||
for r in results:
|
||||
if r['match']:
|
||||
print(f"Found: {r['match']} (similarity: {r['similarity']:.3f})")
|
||||
store.save()
|
||||
print(f"Index saved: {store}")
|
||||
```
|
||||
|
||||
---
|
||||
**CLI tool:**
|
||||
|
||||
## Visualization
|
||||
```bash
|
||||
python tools/faiss_search.py build --faces-dir dataset/ --db-path ./my_index
|
||||
```
|
||||
|
||||
### Search against the index
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.stores import FAISS
|
||||
|
||||
def visualize_search_results(image, results):
|
||||
"""Draw search results on image."""
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0) # Green for match
|
||||
label = f"{r['match']} ({r['similarity']:.2f})"
|
||||
else:
|
||||
color = (0, 0, 255) # Red for unknown
|
||||
label = f"Unknown ({r['similarity']:.2f})"
|
||||
store = FAISS(db_path="./my_index")
|
||||
store.load()
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
image = cv2.imread("query.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
return image
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
result, similarity = store.search(embedding, threshold=0.4)
|
||||
|
||||
# Usage
|
||||
results = db.search(image)
|
||||
annotated = visualize_search_results(image.copy(), results)
|
||||
cv2.imwrite("search_result.jpg", annotated)
|
||||
if result:
|
||||
print(f"Matched: {result['person_id']} ({similarity:.2f})")
|
||||
else:
|
||||
print(f"Unknown ({similarity:.2f})")
|
||||
```
|
||||
|
||||
---
|
||||
**CLI tool:**
|
||||
|
||||
## Real-Time Search
|
||||
```bash
|
||||
python tools/faiss_search.py run --db-path ./my_index --source video.mp4
|
||||
python tools/faiss_search.py run --db-path ./my_index --source 0 # webcam
|
||||
```
|
||||
|
||||
### Manage the index
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.stores import FAISS
|
||||
|
||||
def realtime_search(db):
|
||||
"""Real-time face search from webcam."""
|
||||
cap = cv2.VideoCapture(0)
|
||||
store = FAISS(db_path="./my_index")
|
||||
store.load()
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
print(f"Total vectors: {len(store)}")
|
||||
|
||||
results = db.search(frame, threshold=0.5)
|
||||
removed = store.remove("person_id", "bob")
|
||||
print(f"Removed {removed} entries")
|
||||
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0)
|
||||
label = r['match']
|
||||
else:
|
||||
color = (0, 0, 255)
|
||||
label = "Unknown"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Search", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
db.load("face_database.npz")
|
||||
realtime_search(db)
|
||||
store.save()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Stores Module](../modules/stores.md) - Full `FAISS` API reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
|
||||
@@ -8,8 +8,10 @@ A complete pipeline for processing images with detection, recognition, and attri
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -32,7 +34,7 @@ def process_image(image_path):
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Step 3: Predict attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
attrs = age_gender.predict(image, face)
|
||||
|
||||
results.append({
|
||||
'face_id': i,
|
||||
@@ -46,12 +48,7 @@ def process_image(image_path):
|
||||
print(f" Face {i+1}: {attrs.sex}, {attrs.age} years old")
|
||||
|
||||
# Visualize
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
draw_detections(image=image, faces=faces)
|
||||
|
||||
return image, results
|
||||
|
||||
@@ -67,14 +64,21 @@ cv2.imwrite("result.jpg", result_image)
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
detector,
|
||||
recognizer=recognizer,
|
||||
attributes=[age_gender],
|
||||
)
|
||||
|
||||
# Process image
|
||||
@@ -97,13 +101,15 @@ Complete pipeline with all modules:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import (
|
||||
RetinaFace, ArcFace, AgeGender, FairFace,
|
||||
Landmark106, MobileGaze
|
||||
)
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.headpose import HeadPose
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
from uniface.draw import draw_detections, draw_gaze, draw_head_pose
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
@@ -114,6 +120,7 @@ class FaceAnalysisPipeline:
|
||||
self.fairface = FairFace()
|
||||
self.landmarker = Landmark106()
|
||||
self.gaze = MobileGaze()
|
||||
self.head_pose = HeadPose()
|
||||
self.parser = BiSeNet()
|
||||
self.spoofer = MiniFASNet()
|
||||
|
||||
@@ -135,12 +142,12 @@ class FaceAnalysisPipeline:
|
||||
)
|
||||
|
||||
# Attributes
|
||||
ag_result = self.age_gender.predict(image, face.bbox)
|
||||
ag_result = self.age_gender.predict(image, face)
|
||||
result['age'] = ag_result.age
|
||||
result['gender'] = ag_result.sex
|
||||
|
||||
# FairFace attributes
|
||||
ff_result = self.fairface.predict(image, face.bbox)
|
||||
ff_result = self.fairface.predict(image, face)
|
||||
result['age_group'] = ff_result.age_group
|
||||
result['race'] = ff_result.race
|
||||
|
||||
@@ -157,6 +164,13 @@ class FaceAnalysisPipeline:
|
||||
result['gaze_pitch'] = gaze_result.pitch
|
||||
result['gaze_yaw'] = gaze_result.yaw
|
||||
|
||||
# Head pose estimation
|
||||
if face_crop.size > 0:
|
||||
hp_result = self.head_pose.estimate(face_crop)
|
||||
result['head_pitch'] = hp_result.pitch
|
||||
result['head_yaw'] = hp_result.yaw
|
||||
result['head_roll'] = hp_result.roll
|
||||
|
||||
# Face parsing
|
||||
if face_crop.size > 0:
|
||||
result['parsing_mask'] = self.parser.parse(face_crop)
|
||||
@@ -179,6 +193,7 @@ for i, r in enumerate(results):
|
||||
print(f" Gender: {r['gender']}, Age: {r['age']}")
|
||||
print(f" Race: {r['race']}, Age Group: {r['age_group']}")
|
||||
print(f" Gaze: pitch={np.degrees(r['gaze_pitch']):.1f}°")
|
||||
print(f" Head Pose: P={r['head_pitch']:.1f}° Y={r['head_yaw']:.1f}° R={r['head_roll']:.1f}°")
|
||||
print(f" Real: {r['is_real']} ({r['spoof_confidence']:.1%})")
|
||||
```
|
||||
|
||||
@@ -189,8 +204,10 @@ for i, r in enumerate(results):
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, AgeGender, MobileGaze
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
@@ -208,7 +225,7 @@ def visualize_analysis(image_path, output_path):
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Age and gender
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
attrs = age_gender.predict(image, face)
|
||||
label = f"{attrs.sex}, {attrs.age}y"
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
||||
@@ -256,6 +273,11 @@ def results_to_json(results):
|
||||
'gaze': {
|
||||
'pitch_deg': float(np.degrees(r['gaze_pitch'])) if 'gaze_pitch' in r else None,
|
||||
'yaw_deg': float(np.degrees(r['gaze_yaw'])) if 'gaze_yaw' in r else None
|
||||
},
|
||||
'head_pose': {
|
||||
'pitch': float(r['head_pitch']) if 'head_pitch' in r else None,
|
||||
'yaw': float(r['head_yaw']) if 'head_yaw' in r else None,
|
||||
'roll': float(r['head_roll']) if 'head_roll' in r else None
|
||||
}
|
||||
}
|
||||
output.append(item)
|
||||
@@ -279,3 +301,4 @@ with open('results.json', 'w') as f:
|
||||
- [Face Search](face-search.md) - Build a search system
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition details
|
||||
- [Head Pose Module](../modules/headpose.md) - Head orientation estimation
|
||||
|
||||
@@ -11,8 +11,8 @@ Real-time face analysis for video streams.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -26,12 +26,7 @@ while True:
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
draw_detections(image=frame, faces=faces)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
@@ -48,7 +43,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
@@ -83,6 +78,57 @@ process_video("input.mp4", "output.mp4")
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
To track faces across frames with persistent IDs, pair a detector with `BYTETracker`:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details on tracker parameters and tuning, see [Tracking](../modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
@@ -119,7 +165,9 @@ while True:
|
||||
|
||||
## See Also
|
||||
|
||||
- [Tracking Module](../modules/tracking.md) - Face tracking with BYTETracker
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze tracking
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze estimation
|
||||
- [Head Pose Module](../modules/headpose.md) - Head orientation estimation
|
||||
|
||||
417
examples/09_face_segmentation.ipynb
Normal file
@@ -0,0 +1,417 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# XSeg Face Segmentation\n",
|
||||
"\n",
|
||||
"<div style=\"display:flex; flex-wrap:wrap; align-items:center;\">\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pepy.tech/projects/uniface\"><img alt=\"PyPI Downloads\" src=\"https://static.pepy.tech/personalized-badge/uniface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pypi.org/project/uniface/\"><img alt=\"PyPI Version\" src=\"https://img.shields.io/pypi/v/uniface.svg\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://opensource.org/licenses/MIT\"><img alt=\"License\" src=\"https://img.shields.io/badge/License-MIT-blue.svg\"></a>\n",
|
||||
" <a style=\"margin-bottom:6px;\" href=\"https://github.com/yakhyo/uniface\"><img alt=\"GitHub Stars\" src=\"https://img.shields.io/github/stars/yakhyo/uniface.svg?style=social\"></a>\n",
|
||||
"</div>\n",
|
||||
"\n",
|
||||
"**UniFace** is a lightweight, production-ready Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, and face attributes.\n",
|
||||
"\n",
|
||||
"🔗 **GitHub**: [github.com/yakhyo/uniface](https://github.com/yakhyo/uniface) | 📚 **Docs**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"This notebook demonstrates face segmentation using the **XSeg** model from DeepFaceLab.\n",
|
||||
"\n",
|
||||
"XSeg outputs a mask for face regions. Unlike BiSeNet which works on bbox crops, XSeg requires 5-point landmarks for face alignment.\n",
|
||||
"\n",
|
||||
"## 1. Install UniFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q \"uniface[cpu]\"\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.parsing import XSeg\n",
|
||||
"\n",
|
||||
"print(f\"UniFace version: {uniface.__version__}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Initialize Models\n",
|
||||
"\n",
|
||||
"XSeg requires face detection with landmarks. We use RetinaFace for detection and XSeg for segmentation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize detector and parser\n",
|
||||
"detector = RetinaFace()\n",
|
||||
"parser = XSeg()\n",
|
||||
"\n",
|
||||
"print(f\"XSeg input size: {parser.input_size}\")\n",
|
||||
"print(f\"Align size: {parser.align_size}\")\n",
|
||||
"print(f\"Blur sigma: {parser.blur_sigma}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Helper Functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def apply_mask_overlay(image, mask, color=(0, 255, 0), alpha=0.5):\n",
|
||||
" \"\"\"Apply colored mask overlay on image.\"\"\"\n",
|
||||
" overlay = image.copy().astype(np.float32)\n",
|
||||
"\n",
|
||||
" # Create colored overlay where mask is positive\n",
|
||||
" color_overlay = np.zeros_like(image, dtype=np.float32)\n",
|
||||
" color_overlay[:] = color\n",
|
||||
"\n",
|
||||
" mask_3ch = mask[..., np.newaxis]\n",
|
||||
" overlay = overlay * (1 - mask_3ch * alpha) + color_overlay * mask_3ch * alpha\n",
|
||||
"\n",
|
||||
" return overlay.clip(0, 255).astype(np.uint8)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def show_results(original, mask, result, title=\"XSeg Result\"):\n",
|
||||
" \"\"\"Display original, mask, and result side by side.\"\"\"\n",
|
||||
" fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
|
||||
"\n",
|
||||
" axes[0].imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))\n",
|
||||
" axes[0].set_title(\"Original\")\n",
|
||||
" axes[0].axis(\"off\")\n",
|
||||
"\n",
|
||||
" axes[1].imshow(mask, cmap=\"gray\")\n",
|
||||
" axes[1].set_title(\"Mask\")\n",
|
||||
" axes[1].axis(\"off\")\n",
|
||||
"\n",
|
||||
" axes[2].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))\n",
|
||||
" axes[2].set_title(\"Overlay\")\n",
|
||||
" axes[2].axis(\"off\")\n",
|
||||
"\n",
|
||||
" plt.suptitle(title)\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Process Single Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load image\n",
|
||||
"image_path = \"../assets/einstien.png\"\n",
|
||||
"image = cv2.imread(image_path)\n",
|
||||
"print(f\"Image shape: {image.shape}\")\n",
|
||||
"\n",
|
||||
"# Detect faces\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"print(f\"Detected {len(faces)} face(s)\")\n",
|
||||
"\n",
|
||||
"# Parse first face\n",
|
||||
"if len(faces) > 0 and faces[0].landmarks is not None:\n",
|
||||
" face = faces[0]\n",
|
||||
" mask = parser.parse(image, landmarks=face.landmarks)\n",
|
||||
"\n",
|
||||
" print(f\"Mask shape: {mask.shape}\")\n",
|
||||
" print(f\"Mask range: [{mask.min():.3f}, {mask.max():.3f}]\")\n",
|
||||
"\n",
|
||||
" # Visualize\n",
|
||||
" result = apply_mask_overlay(image, mask)\n",
|
||||
" show_results(image, mask, result, \"Single Face Segmentation\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Configurable Parameters\n",
|
||||
"\n",
|
||||
"XSeg has two main parameters:\n",
|
||||
"- `align_size`: Face alignment output size (default: 256)\n",
|
||||
"- `blur_sigma`: Gaussian blur for mask smoothing (default: 0 = raw output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load image\n",
|
||||
"image_path = \"../assets/einstien.png\"\n",
|
||||
"image = cv2.imread(image_path)\n",
|
||||
"\n",
|
||||
"# Detect face\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"landmarks = faces[0].landmarks\n",
|
||||
"\n",
|
||||
"# Compare different blur settings\n",
|
||||
"blur_values = [0, 3, 5]\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, len(blur_values), figsize=(15, 5))\n",
|
||||
"\n",
|
||||
"for i, blur in enumerate(blur_values):\n",
|
||||
" parser_test = XSeg(blur_sigma=blur)\n",
|
||||
" mask = parser_test.parse(image, landmarks=landmarks)\n",
|
||||
"\n",
|
||||
" axes[i].imshow(mask, cmap=\"gray\")\n",
|
||||
" axes[i].set_title(f\"blur_sigma={blur}\")\n",
|
||||
" axes[i].axis(\"off\")\n",
|
||||
"\n",
|
||||
"plt.suptitle(\"Effect of blur_sigma\")\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Using parse_aligned\n",
|
||||
"\n",
|
||||
"If you already have aligned face crops, use `parse_aligned()` directly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from uniface.face_utils import face_alignment\n",
|
||||
"\n",
|
||||
"# Load and detect\n",
|
||||
"image = cv2.imread(\"../assets/einstien.png\")\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"landmarks = faces[0].landmarks\n",
|
||||
"\n",
|
||||
"# Align face manually\n",
|
||||
"aligned_face, inverse_matrix = face_alignment(image, landmarks, image_size=256)\n",
|
||||
"print(f\"Aligned face shape: {aligned_face.shape}\")\n",
|
||||
"\n",
|
||||
"# Parse aligned crop directly\n",
|
||||
"mask = parser.parse_aligned(aligned_face)\n",
|
||||
"print(f\"Mask shape: {mask.shape}\")\n",
|
||||
"\n",
|
||||
"# Visualize\n",
|
||||
"result = apply_mask_overlay(aligned_face, mask)\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(12, 4))\n",
|
||||
"axes[0].imshow(cv2.cvtColor(aligned_face, cv2.COLOR_BGR2RGB))\n",
|
||||
"axes[0].set_title(\"Aligned Face\")\n",
|
||||
"axes[0].axis(\"off\")\n",
|
||||
"\n",
|
||||
"axes[1].imshow(mask, cmap=\"gray\")\n",
|
||||
"axes[1].set_title(\"Mask\")\n",
|
||||
"axes[1].axis(\"off\")\n",
|
||||
"\n",
|
||||
"axes[2].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))\n",
|
||||
"axes[2].set_title(\"Overlay\")\n",
|
||||
"axes[2].axis(\"off\")\n",
|
||||
"\n",
|
||||
"plt.suptitle(\"parse_aligned() on pre-aligned crop\")\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. XSeg vs BiSeNet\n",
|
||||
"\n",
|
||||
"| Feature | XSeg | BiSeNet |\n",
|
||||
"|---------|------|--------|\n",
|
||||
"| Output | Mask [0, 1] | 19 class labels |\n",
|
||||
"| Input | Requires landmarks | Works on bbox crops |\n",
|
||||
"| Use case | Face region extraction | Facial component parsing |\n",
|
||||
"| Origin | DeepFaceLab | CelebAMask-HQ |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from uniface.parsing import BiSeNet\n",
|
||||
"from uniface.draw import vis_parsing_maps\n",
|
||||
"\n",
|
||||
"# Load image and detect\n",
|
||||
"image = cv2.imread(\"../assets/einstien.png\")\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"face = faces[0]\n",
|
||||
"\n",
|
||||
"# XSeg: requires landmarks\n",
|
||||
"xseg_mask = parser.parse(image, landmarks=face.landmarks)\n",
|
||||
"\n",
|
||||
"# BiSeNet: works on bbox crop\n",
|
||||
"bisenet = BiSeNet()\n",
|
||||
"x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
|
||||
"face_crop = image[y1:y2, x1:x2]\n",
|
||||
"bisenet_mask = bisenet.parse(face_crop)\n",
|
||||
"\n",
|
||||
"# Visualize comparison\n",
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
|
||||
"\n",
|
||||
"axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n",
|
||||
"axes[0].set_title(\"Original\")\n",
|
||||
"axes[0].axis(\"off\")\n",
|
||||
"\n",
|
||||
"axes[1].imshow(xseg_mask, cmap=\"gray\")\n",
|
||||
"axes[1].set_title(\"XSeg\")\n",
|
||||
"axes[1].axis(\"off\")\n",
|
||||
"\n",
|
||||
"face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)\n",
|
||||
"bisenet_vis = vis_parsing_maps(face_rgb, bisenet_mask, save_image=False)\n",
|
||||
"axes[2].imshow(bisenet_vis)\n",
|
||||
"axes[2].set_title(\"BiSeNet (19 classes)\")\n",
|
||||
"axes[2].axis(\"off\")\n",
|
||||
"\n",
|
||||
"plt.suptitle(\"XSeg vs BiSeNet\")\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 9. Application: Face Masking\n",
|
||||
"\n",
|
||||
"Use XSeg mask to extract or replace face regions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load image\n",
|
||||
"image = cv2.imread(\"../assets/einstien.png\")\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"mask = parser.parse(image, landmarks=faces[0].landmarks)\n",
|
||||
"\n",
|
||||
"# Extract face only\n",
|
||||
"mask_3ch = np.stack([mask] * 3, axis=-1)\n",
|
||||
"face_only = (image * mask_3ch).astype(np.uint8)\n",
|
||||
"\n",
|
||||
"# Replace background with white\n",
|
||||
"white_bg = np.ones_like(image) * 255\n",
|
||||
"face_on_white = (image * mask_3ch + white_bg * (1 - mask_3ch)).astype(np.uint8)\n",
|
||||
"\n",
|
||||
"# Visualize\n",
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
|
||||
"\n",
|
||||
"axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n",
|
||||
"axes[0].set_title(\"Original\")\n",
|
||||
"axes[0].axis(\"off\")\n",
|
||||
"\n",
|
||||
"axes[1].imshow(cv2.cvtColor(face_only, cv2.COLOR_BGR2RGB))\n",
|
||||
"axes[1].set_title(\"Face Extracted\")\n",
|
||||
"axes[1].axis(\"off\")\n",
|
||||
"\n",
|
||||
"axes[2].imshow(cv2.cvtColor(face_on_white, cv2.COLOR_BGR2RGB))\n",
|
||||
"axes[2].set_title(\"White Background\")\n",
|
||||
"axes[2].axis(\"off\")\n",
|
||||
"\n",
|
||||
"plt.suptitle(\"Face Masking Applications\")\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Summary\n",
|
||||
"\n",
|
||||
"XSeg provides face segmentation using landmark-based alignment:\n",
|
||||
"\n",
|
||||
"- **`parse(image, landmarks=landmarks)`** - Full pipeline: align, segment, warp back\n",
|
||||
"- **`parse_aligned(face_crop)`** - For pre-aligned crops\n",
|
||||
"- **`parse_with_inverse(image, landmarks)`** - Returns mask + crop + inverse matrix\n",
|
||||
"\n",
|
||||
"Parameters:\n",
|
||||
"- `align_size` - Face alignment size (default: 256)\n",
|
||||
"- `blur_sigma` - Mask smoothing (default: 0 = raw)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
291
examples/10_face_vector_store.ipynb
Normal file
@@ -0,0 +1,291 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Face Vector Store with FAISS\n",
|
||||
"\n",
|
||||
"<div style=\"display:flex; flex-wrap:wrap; align-items:center;\">\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pepy.tech/projects/uniface\"><img alt=\"PyPI Downloads\" src=\"https://static.pepy.tech/personalized-badge/uniface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pypi.org/project/uniface/\"><img alt=\"PyPI Version\" src=\"https://img.shields.io/pypi/v/uniface.svg\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://opensource.org/licenses/MIT\"><img alt=\"License\" src=\"https://img.shields.io/badge/License-MIT-blue.svg\"></a>\n",
|
||||
" <a style=\"margin-bottom:6px;\" href=\"https://github.com/yakhyo/uniface\"><img alt=\"GitHub Stars\" src=\"https://img.shields.io/github/stars/yakhyo/uniface.svg?style=social\"></a>\n",
|
||||
"</div>\n",
|
||||
"\n",
|
||||
"**UniFace** is a lightweight, production-ready Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, and face attributes.\n",
|
||||
"\n",
|
||||
"🔗 **GitHub**: [github.com/yakhyo/uniface](https://github.com/yakhyo/uniface) | 📚 **Docs**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to build a persistent face database using the **FAISS** vector store in UniFace.\n",
|
||||
"\n",
|
||||
"Unlike direct pairwise comparison (see `04_face_search`), a vector store lets you efficiently index\n",
|
||||
"thousands of face embeddings and retrieve the closest match in sub-millisecond time.\n",
|
||||
"\n",
|
||||
"## 1. Install UniFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q \"uniface[cpu]\" faiss-cpu\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"from uniface.stores import FAISS\n",
|
||||
"\n",
|
||||
"print(f'UniFace version: {uniface.__version__}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Initialize Models and Vector Store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace(),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"DB_PATH = './demo_face_index'\n",
|
||||
"store = FAISS(embedding_size=512, db_path=DB_PATH)\n",
|
||||
"print(store)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Enroll Faces into the Store\n",
|
||||
"\n",
|
||||
"We detect faces in the test images and add each embedding with metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"enrollment_images = {\n",
|
||||
" '../assets/test_images/image0.jpg': 'person_0',\n",
|
||||
" '../assets/test_images/image1.jpg': 'person_1',\n",
|
||||
" '../assets/test_images/image2.jpg': 'person_2',\n",
|
||||
" '../assets/test_images/image3.jpg': 'person_3',\n",
|
||||
" '../assets/test_images/image4.jpg': 'person_4',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"for path, label in enrollment_images.items():\n",
|
||||
" image = cv2.imread(path)\n",
|
||||
" faces = analyzer.analyze(image)\n",
|
||||
" if faces:\n",
|
||||
" store.add(\n",
|
||||
" embedding=faces[0].embedding,\n",
|
||||
" metadata={'label': label, 'source': path},\n",
|
||||
" )\n",
|
||||
" print(f'Enrolled {label} from {path}')\n",
|
||||
"\n",
|
||||
"print(f'\\nStore size: {store.size} vectors')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Search the Store\n",
|
||||
"\n",
|
||||
"Use a query image to find the closest match in the database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_image = cv2.imread('../assets/test_images/image0.jpg')\n",
|
||||
"query_faces = analyzer.analyze(query_image)\n",
|
||||
"\n",
|
||||
"if query_faces:\n",
|
||||
" result, similarity = store.search(query_faces[0].embedding, threshold=0.4)\n",
|
||||
"\n",
|
||||
" if result:\n",
|
||||
" print(f'Match found: {result[\"label\"]} (similarity: {similarity:.4f})')\n",
|
||||
" print(f'Source: {result[\"source\"]}')\n",
|
||||
" else:\n",
|
||||
" print(f'No match above threshold (best similarity: {similarity:.4f})')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if query_faces and result:\n",
|
||||
" matched_image = cv2.imread(result['source'])\n",
|
||||
"\n",
|
||||
" fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n",
|
||||
" axes[0].imshow(cv2.cvtColor(query_image, cv2.COLOR_BGR2RGB))\n",
|
||||
" axes[0].set_title('Query', fontsize=12)\n",
|
||||
" axes[1].imshow(cv2.cvtColor(matched_image, cv2.COLOR_BGR2RGB))\n",
|
||||
" axes[1].set_title(f'Match: {result[\"label\"]} ({similarity:.3f})', fontsize=12)\n",
|
||||
" for ax in axes:\n",
|
||||
" ax.axis('off')\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Save and Reload the Index\n",
|
||||
"\n",
|
||||
"The index and metadata can be persisted to disk and loaded later."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store.save()\n",
|
||||
"\n",
|
||||
"# Create a fresh store and load the saved data\n",
|
||||
"store_reloaded = FAISS(embedding_size=512, db_path=DB_PATH)\n",
|
||||
"loaded = store_reloaded.load()\n",
|
||||
"print(f'Load successful: {loaded}')\n",
|
||||
"print(f'Reloaded store size: {store_reloaded.size} vectors')\n",
|
||||
"\n",
|
||||
"# Verify search still works after reload\n",
|
||||
"if query_faces:\n",
|
||||
" result, similarity = store_reloaded.search(query_faces[0].embedding, threshold=0.4)\n",
|
||||
" if result:\n",
|
||||
" print(f'Search after reload: {result[\"label\"]} ({similarity:.4f})')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Remove Entries\n",
|
||||
"\n",
|
||||
"Remove all entries matching a metadata key-value pair."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f'Before removal: {store.size} vectors')\n",
|
||||
"\n",
|
||||
"removed = store.remove(key='label', value='person_0')\n",
|
||||
"print(f'Removed {removed} entry')\n",
|
||||
"print(f'After removal: {store.size} vectors')\n",
|
||||
"\n",
|
||||
"# Searching for the removed person should now return a different (lower) match\n",
|
||||
"if query_faces:\n",
|
||||
" result, similarity = store.search(query_faces[0].embedding, threshold=0.4)\n",
|
||||
" if result:\n",
|
||||
" print(f'\\nClosest remaining match: {result[\"label\"]} ({similarity:.4f})')\n",
|
||||
" else:\n",
|
||||
" print(f'\\nNo match above threshold (best similarity: {similarity:.4f})')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Cleanup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"shutil.rmtree(DB_PATH, ignore_errors=True)\n",
|
||||
"print('Cleaned up demo index.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- Embeddings **must** be L2-normalised before adding (ArcFace already produces normalised embeddings)\n",
|
||||
"- The default threshold of `0.4` works for most cases; raise it for stricter matching\n",
|
||||
"- `save()` / `load()` persist the FAISS index and metadata as files in `db_path`\n",
|
||||
"- For GPU-accelerated search install `faiss-gpu` instead of `faiss-cpu`\n",
|
||||
"- The store uses `IndexFlatIP` (inner product = cosine similarity for normalised vectors)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
223
examples/11_head_pose_estimation.ipynb
Normal file
@@ -0,0 +1,223 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Head Pose Estimation with UniFace\n",
|
||||
"\n",
|
||||
"<div style=\"display:flex; flex-wrap:wrap; align-items:center;\">\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pepy.tech/projects/uniface\"><img alt=\"PyPI Downloads\" src=\"https://static.pepy.tech/personalized-badge/uniface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pypi.org/project/uniface/\"><img alt=\"PyPI Version\" src=\"https://img.shields.io/pypi/v/uniface.svg\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://opensource.org/licenses/MIT\"><img alt=\"License\" src=\"https://img.shields.io/badge/License-MIT-blue.svg\"></a>\n",
|
||||
" <a style=\"margin-bottom:6px;\" href=\"https://github.com/yakhyo/uniface\"><img alt=\"GitHub Stars\" src=\"https://img.shields.io/github/stars/yakhyo/uniface.svg?style=social\"></a>\n",
|
||||
"</div>\n",
|
||||
"\n",
|
||||
"**UniFace** is a lightweight, production-ready Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, and face attributes.\n",
|
||||
"\n",
|
||||
"🔗 **GitHub**: [github.com/yakhyo/uniface](https://github.com/yakhyo/uniface) | 📚 **Docs**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"This notebook demonstrates head pose estimation using the **UniFace** library.\n",
|
||||
"\n",
|
||||
"## 1. Install UniFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q \"uniface[cpu]\"\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from pathlib import Path\n",
|
||||
"from PIL import Image\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.headpose import HeadPose\n",
|
||||
"from uniface.draw import draw_head_pose\n",
|
||||
"\n",
|
||||
"print(f\"UniFace version: {uniface.__version__}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Initialize Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize face detector\n",
|
||||
"detector = RetinaFace(confidence_threshold=0.5)\n",
|
||||
"\n",
|
||||
"# Initialize head pose estimator (default: ResNet18 backbone)\n",
|
||||
"head_pose = HeadPose()\n",
|
||||
"\n",
|
||||
"print(\"Models initialized successfully!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Process All Test Images\n",
|
||||
"\n",
|
||||
"Display original images in the first row and head-pose-annotated images in the second row."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get all test images\n",
|
||||
"test_images_dir = Path('../assets/test_images')\n",
|
||||
"test_images = sorted(test_images_dir.glob('*.jpg'))\n",
|
||||
"\n",
|
||||
"original_images = []\n",
|
||||
"annotated_images = []\n",
|
||||
"\n",
|
||||
"for img_path in test_images:\n",
|
||||
" image = cv2.imread(str(img_path))\n",
|
||||
" if image is None:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" # Store original (BGR -> RGB for display)\n",
|
||||
" original_images.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n",
|
||||
"\n",
|
||||
" # Detect faces and estimate head pose\n",
|
||||
" faces = detector.detect(image)\n",
|
||||
"\n",
|
||||
" for face in faces:\n",
|
||||
" x1, y1, x2, y2 = map(int, face.bbox)\n",
|
||||
" face_crop = image[y1:y2, x1:x2]\n",
|
||||
"\n",
|
||||
" if face_crop.size == 0:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" result = head_pose.estimate(face_crop)\n",
|
||||
" draw_head_pose(image, face.bbox, result.pitch, result.yaw, result.roll)\n",
|
||||
"\n",
|
||||
" print(f\"{img_path.name}: pitch={result.pitch:.1f}°, yaw={result.yaw:.1f}°, roll={result.roll:.1f}°\")\n",
|
||||
"\n",
|
||||
" annotated_images.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n",
|
||||
"\n",
|
||||
"print(f\"\\nProcessed {len(original_images)} images\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Visualize Results\n",
|
||||
"\n",
|
||||
"**First row**: Original images \n",
|
||||
"**Second row**: Images with head pose 3D cube overlay"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_images = len(original_images)\n",
|
||||
"\n",
|
||||
"# Create figure with 2 rows\n",
|
||||
"fig, axes = plt.subplots(2, num_images, figsize=(5 * num_images, 10))\n",
|
||||
"\n",
|
||||
"if num_images == 1:\n",
|
||||
" axes = axes.reshape(2, 1)\n",
|
||||
"\n",
|
||||
"for i in range(num_images):\n",
|
||||
" axes[0, i].imshow(original_images[i])\n",
|
||||
" axes[0, i].set_title('Original', fontsize=12)\n",
|
||||
" axes[0, i].axis('off')\n",
|
||||
"\n",
|
||||
" axes[1, i].imshow(annotated_images[i])\n",
|
||||
" axes[1, i].set_title('Head Pose', fontsize=12)\n",
|
||||
" axes[1, i].axis('off')\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- **Input**: Head pose estimation requires a face crop (obtained from face detection)\n",
|
||||
"- **Output**: `HeadPoseResult` with pitch, yaw, and roll angles in **degrees**\n",
|
||||
"- **Visualization**: Two modes available — `'cube'` (3D wireframe) and `'axis'` (X/Y/Z coordinate axes)\n",
|
||||
"- **Models**: 6 backbone variants available via `HeadPoseWeights` enum\n",
|
||||
"- **Method**: Uses 6D rotation representation converted to Euler angles\n",
|
||||
"\n",
|
||||
"### Available Backbones\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from uniface.constants import HeadPoseWeights\n",
|
||||
"\n",
|
||||
"# Options: RESNET18, RESNET34, RESNET50, MOBILENET_V2, MOBILENET_V3_SMALL, MOBILENET_V3_LARGE\n",
|
||||
"head_pose = HeadPose(model_name=HeadPoseWeights.RESNET50)\n",
|
||||
"```"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
356
examples/12_face_recognition.ipynb
Normal file
@@ -0,0 +1,356 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Face Recognition: RetinaFace → Align → ArcFace\n",
|
||||
"\n",
|
||||
"<div style=\"display:flex; flex-wrap:wrap; align-items:center;\">\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pepy.tech/projects/uniface\"><img alt=\"PyPI Downloads\" src=\"https://static.pepy.tech/personalized-badge/uniface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pypi.org/project/uniface/\"><img alt=\"PyPI Version\" src=\"https://img.shields.io/pypi/v/uniface.svg\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://opensource.org/licenses/MIT\"><img alt=\"License\" src=\"https://img.shields.io/badge/License-MIT-blue.svg\"></a>\n",
|
||||
" <a style=\"margin-bottom:6px;\" href=\"https://github.com/yakhyo/uniface\"><img alt=\"GitHub Stars\" src=\"https://img.shields.io/github/stars/yakhyo/uniface.svg?style=social\"></a>\n",
|
||||
"</div>\n",
|
||||
"\n",
|
||||
"**UniFace** is a lightweight, production-ready Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, and face attributes.\n",
|
||||
"\n",
|
||||
"🔗 **GitHub**: [github.com/yakhyo/uniface](https://github.com/yakhyo/uniface) | 📚 **Docs**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"This notebook demonstrates face recognition **without** the high-level `FaceAnalyzer` wrapper. Each step is handled manually:\n",
|
||||
"\n",
|
||||
"1. **RetinaFace**: Detects faces and extracts 5-point landmarks.\n",
|
||||
"2. **Face Alignment**: Warps each face into a standardized 112x112 crop using the landmarks.\n",
|
||||
"3. **ArcFace**: Generates a 512-D L2-normalized embedding from the aligned crop.\n",
|
||||
"\n",
|
||||
"We compare three test images: `image0.jpg`, `image1.jpg`, and `image5.jpg`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Install UniFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q \"uniface[cpu]\"\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import matplotlib.patches as patches\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"from uniface.face_utils import face_alignment\n",
|
||||
"\n",
|
||||
"print(f\"UniFace version: {uniface.__version__}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Configuration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"IMAGE_PATHS = {\n",
|
||||
" \"image0\": \"../assets/test_images/image0.jpg\",\n",
|
||||
" \"image1\": \"../assets/test_images/image1.jpg\",\n",
|
||||
" \"image5\": \"../assets/test_images/image5.jpg\",\n",
|
||||
"}\n",
|
||||
"THRESHOLD = 0.4 # Cosine similarity threshold for \"same person\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Initialize Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"detector = RetinaFace(confidence_threshold=0.5)\n",
|
||||
"recognizer = ArcFace()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Load Images & Detect Faces\n",
|
||||
"\n",
|
||||
"We use the detector to find faces and their landmarks in each image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images = {}\n",
|
||||
"faces = {}\n",
|
||||
"\n",
|
||||
"for name, path in IMAGE_PATHS.items():\n",
|
||||
" img = cv2.imread(path)\n",
|
||||
" if img is None:\n",
|
||||
" raise FileNotFoundError(f\"Cannot read: {path}\")\n",
|
||||
"\n",
|
||||
" detected = detector.detect(img)\n",
|
||||
" if not detected:\n",
|
||||
" raise RuntimeError(f\"No face detected in: {path}\")\n",
|
||||
"\n",
|
||||
" images[name] = img\n",
|
||||
" faces[name] = detected[0] # Keep highest-confidence face\n",
|
||||
" print(f\"{name:8s} | {len(detected)} face(s) detected | confidence={faces[name].confidence:.3f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "11",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Visualize Detections"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "12",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"LM_COLORS = [\"red\", \"blue\", \"green\", \"cyan\", \"magenta\"]\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
|
||||
"fig.suptitle(\"Detected Faces & 5-Point Landmarks\", fontweight=\"bold\", fontsize=16)\n",
|
||||
"\n",
|
||||
"for ax, (name, img) in zip(axes, images.items()):\n",
|
||||
" face = faces[name]\n",
|
||||
" ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))\n",
|
||||
" ax.set_title(f\"{name}\\nconf={face.confidence:.3f}\", fontsize=12)\n",
|
||||
" ax.axis(\"off\")\n",
|
||||
"\n",
|
||||
" # Bounding box\n",
|
||||
" x1, y1, x2, y2 = face.bbox.astype(int)\n",
|
||||
" ax.add_patch(patches.Rectangle(\n",
|
||||
" (x1, y1), x2 - x1, y2 - y1,\n",
|
||||
" linewidth=2, edgecolor=\"lime\", facecolor=\"none\"))\n",
|
||||
"\n",
|
||||
" # Landmarks\n",
|
||||
" for (lx, ly), c in zip(face.landmarks, LM_COLORS):\n",
|
||||
" ax.plot(lx, ly, \"o\", color=c, markersize=6)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Face Alignment\n",
|
||||
"\n",
|
||||
"We warp the detected faces into a standardized 112x112 size. This improves recognition accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "14",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aligned = {}\n",
|
||||
"\n",
|
||||
"for name, img in images.items():\n",
|
||||
" lm = faces[name].landmarks\n",
|
||||
" crop, _ = face_alignment(img, lm, image_size=(112, 112))\n",
|
||||
" aligned[name] = crop\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(12, 4))\n",
|
||||
"fig.suptitle(\"Aligned Face Crops (112x112)\", fontweight=\"bold\", fontsize=14)\n",
|
||||
"\n",
|
||||
"for ax, (name, crop) in zip(axes, aligned.items()):\n",
|
||||
" ax.imshow(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))\n",
|
||||
" ax.set_title(name, fontsize=12)\n",
|
||||
" ax.axis(\"off\")\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Extract Embeddings\n",
|
||||
"\n",
|
||||
"We pass the aligned crops to ArcFace to get the 512-D vectors."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = {}\n",
|
||||
"\n",
|
||||
"for name, crop in aligned.items():\n",
|
||||
" # landmarks=None because image is already aligned\n",
|
||||
" emb = recognizer.get_normalized_embedding(crop, landmarks=None)\n",
|
||||
" embeddings[name] = emb\n",
|
||||
" print(f\"{name:8s} | embedding shape={emb.shape} | L2-norm={np.linalg.norm(emb):.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 9. Pairwise Cosine Similarity\n",
|
||||
"\n",
|
||||
"Since embeddings are normalized, cosine similarity is just the dot product."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"names = list(embeddings.keys())\n",
|
||||
"n = len(names)\n",
|
||||
"sim_matrix = np.zeros((n, n))\n",
|
||||
"\n",
|
||||
"for i, ni in enumerate(names):\n",
|
||||
" for j, nj in enumerate(names):\n",
|
||||
" # Use squeeze() to handle (1, 512) shapes if present\n",
|
||||
" sim_matrix[i, j] = float(np.dot(embeddings[ni].squeeze(), embeddings[nj].squeeze()))\n",
|
||||
"\n",
|
||||
"# Print comparison results\n",
|
||||
"pairs = [(names[i], names[j]) for i in range(n) for j in range(i + 1, n)]\n",
|
||||
"for a, b in pairs:\n",
|
||||
" s = float(np.dot(embeddings[a].squeeze(), embeddings[b].squeeze()))\n",
|
||||
" verdict = \"✓ Same person\" if s >= THRESHOLD else \"✗ Different people\"\n",
|
||||
" print(f\"{a} vs {b}: similarity={s:.4f} → {verdict}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 10. Similarity Heatmap"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "20",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
||||
"im = ax.imshow(sim_matrix, vmin=0, vmax=1, cmap=\"viridis\")\n",
|
||||
"plt.colorbar(im, ax=ax, label=\"Cosine similarity\")\n",
|
||||
"\n",
|
||||
"ax.set_xticks(range(n))\n",
|
||||
"ax.set_yticks(range(n))\n",
|
||||
"ax.set_xticklabels(names, rotation=30, ha=\"right\")\n",
|
||||
"ax.set_yticklabels(names)\n",
|
||||
"ax.set_title(\"Pairwise Face Similarity (ArcFace)\", fontweight=\"bold\")\n",
|
||||
"\n",
|
||||
"for i in range(n):\n",
|
||||
" for j in range(n):\n",
|
||||
" val = sim_matrix[i, j]\n",
|
||||
" ax.text(j, i, f\"{val:.2f}\",\n",
|
||||
" ha=\"center\", va=\"center\",\n",
|
||||
" color=\"black\" if val >= 0.6 else \"white\",\n",
|
||||
" fontsize=12, fontweight=\"bold\")\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
265
examples/13_portrait_matting.ipynb
Normal file
@@ -0,0 +1,265 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Portrait Matting with MODNet\n",
|
||||
"\n",
|
||||
"<div style=\"display:flex; flex-wrap:wrap; align-items:center;\">\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pepy.tech/projects/uniface\"><img alt=\"PyPI Downloads\" src=\"https://static.pepy.tech/personalized-badge/uniface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://pypi.org/project/uniface/\"><img alt=\"PyPI Version\" src=\"https://img.shields.io/pypi/v/uniface.svg\"></a>\n",
|
||||
" <a style=\"margin-right:10px; margin-bottom:6px;\" href=\"https://opensource.org/licenses/MIT\"><img alt=\"License\" src=\"https://img.shields.io/badge/License-MIT-blue.svg\"></a>\n",
|
||||
" <a style=\"margin-bottom:6px;\" href=\"https://github.com/yakhyo/uniface\"><img alt=\"GitHub Stars\" src=\"https://img.shields.io/github/stars/yakhyo/uniface.svg?style=social\"></a>\n",
|
||||
"</div>\n",
|
||||
"\n",
|
||||
"**UniFace** is a lightweight, production-ready Python library for face detection, recognition, tracking, landmark analysis, face parsing, gaze estimation, and face attributes.\n",
|
||||
"\n",
|
||||
"🔗 **GitHub**: [github.com/yakhyo/uniface](https://github.com/yakhyo/uniface) | 📚 **Docs**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"This notebook demonstrates portrait matting using **MODNet** — a trimap-free model that produces soft alpha mattes from full images. No face detection or cropping required.\n",
|
||||
"\n",
|
||||
"## 1. Install UniFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -q \"uniface[cpu]\"\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface.matting import MODNet\n",
|
||||
"\n",
|
||||
"print(f\"UniFace version: {uniface.__version__}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Initialize Model\n",
|
||||
"\n",
|
||||
"MODNet has two variants:\n",
|
||||
"- **PHOTOGRAPHIC** (default): optimized for high-quality portrait photos\n",
|
||||
"- **WEBCAM**: optimized for real-time webcam feeds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"matting = MODNet()\n",
|
||||
"\n",
|
||||
"print(f\"Input size: {matting.input_size}\")\n",
|
||||
"print(f\"Input name: {matting.input_name}\")\n",
|
||||
"print(f\"Output names: {matting.output_names}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Helper Functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def compose(image, matte, background=None):\n",
|
||||
" \"\"\"Composite foreground over a background using the alpha matte.\"\"\"\n",
|
||||
" h, w = image.shape[:2]\n",
|
||||
" matte_3ch = matte[:, :, np.newaxis]\n",
|
||||
"\n",
|
||||
" if background is None:\n",
|
||||
" bg = np.full_like(image, (0, 177, 64), dtype=np.uint8)\n",
|
||||
" else:\n",
|
||||
" bg = cv2.resize(background, (w, h), interpolation=cv2.INTER_AREA)\n",
|
||||
"\n",
|
||||
" return (image * matte_3ch + bg * (1 - matte_3ch)).astype(np.uint8)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def show_results(image, matte):\n",
|
||||
" \"\"\"Display original, matte, and green screen as a single merged image.\"\"\"\n",
|
||||
" matte_vis = cv2.cvtColor((matte * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)\n",
|
||||
" green = compose(image, matte)\n",
|
||||
" merged = np.hstack([image, matte_vis, green])\n",
|
||||
"\n",
|
||||
" plt.figure(figsize=(18, 6))\n",
|
||||
" plt.imshow(cv2.cvtColor(merged, cv2.COLOR_BGR2RGB))\n",
|
||||
" plt.axis(\"off\")\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Basic Matting"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image = cv2.imread(\"../assets/demos/src_portrait1.jpg\")\n",
|
||||
"print(f\"Image shape: {image.shape}\")\n",
|
||||
"\n",
|
||||
"matte = matting.predict(image)\n",
|
||||
"print(f\"Matte shape: {matte.shape}\")\n",
|
||||
"print(f\"Matte dtype: {matte.dtype}\")\n",
|
||||
"print(f\"Matte range: [{matte.min():.3f}, {matte.max():.3f}]\")\n",
|
||||
"\n",
|
||||
"show_results(image, matte)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Transparent Background (RGBA)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"alpha = (matte * 255).astype(np.uint8)\n",
|
||||
"rgba = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)\n",
|
||||
"rgba[:, :, 3] = alpha\n",
|
||||
"\n",
|
||||
"# Checkerboard background to visualize transparency\n",
|
||||
"h, w = image.shape[:2]\n",
|
||||
"checker = np.zeros((h, w, 3), dtype=np.uint8)\n",
|
||||
"block = 20\n",
|
||||
"for y in range(0, h, block):\n",
|
||||
" for x in range(0, w, block):\n",
|
||||
" if (y // block + x // block) % 2 == 0:\n",
|
||||
" checker[y:y+block, x:x+block] = 200\n",
|
||||
" else:\n",
|
||||
" checker[y:y+block, x:x+block] = 255\n",
|
||||
"\n",
|
||||
"matte_3ch = matte[:, :, np.newaxis]\n",
|
||||
"rgba_vis = (image * matte_3ch + checker * (1 - matte_3ch)).astype(np.uint8)\n",
|
||||
"\n",
|
||||
"merged = np.hstack([image, rgba_vis])\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(16, 5))\n",
|
||||
"plt.imshow(cv2.cvtColor(merged, cv2.COLOR_BGR2RGB))\n",
|
||||
"plt.axis(\"off\")\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"RGBA shape: {rgba.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Custom Background"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a gradient background\n",
|
||||
"h, w = image.shape[:2]\n",
|
||||
"gradient = np.zeros((h, w, 3), dtype=np.uint8)\n",
|
||||
"for y in range(h):\n",
|
||||
" ratio = y / h\n",
|
||||
" gradient[y, :] = [int(180 * (1 - ratio)), int(100 + 80 * ratio), int(220 * ratio)]\n",
|
||||
"\n",
|
||||
"custom_bg = compose(image, matte, gradient)\n",
|
||||
"green_bg = compose(image, matte)\n",
|
||||
"\n",
|
||||
"merged = np.hstack([image, green_bg, custom_bg])\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(18, 6))\n",
|
||||
"plt.imshow(cv2.cvtColor(merged, cv2.COLOR_BGR2RGB))\n",
|
||||
"plt.axis(\"off\")\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Summary\n",
|
||||
"\n",
|
||||
"MODNet provides trimap-free portrait matting:\n",
|
||||
"\n",
|
||||
"- **`predict(image)`** — returns `(H, W)` float32 alpha matte in `[0, 1]`\n",
|
||||
"- **No face detection needed** — works on full images directly\n",
|
||||
"- **Two variants** — `PHOTOGRAPHIC` for photos, `WEBCAM` for real-time\n",
|
||||
"- **Compositing** — use the matte for transparent PNGs, green screen, or custom backgrounds\n",
|
||||
"\n",
|
||||
"For more details, see the [Matting docs](https://yakhyo.github.io/uniface/modules/matting/)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
site_name: UniFace
|
||||
site_description: All-in-One Face Analysis Library with ONNX Runtime
|
||||
site_description: A Unified Face Analysis Library for Python
|
||||
site_author: Yakhyokhuja Valikhujaev
|
||||
site_url: https://yakhyo.github.io/uniface
|
||||
|
||||
@@ -48,6 +48,7 @@ theme:
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- announce.dismiss
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
@@ -134,6 +135,7 @@ nav:
|
||||
- Quickstart: quickstart.md
|
||||
- Notebooks: notebooks.md
|
||||
- Model Zoo: models.md
|
||||
- Datasets: datasets.md
|
||||
- Tutorials:
|
||||
- Image Pipeline: recipes/image-pipeline.md
|
||||
- Video & Webcam: recipes/video-webcam.md
|
||||
@@ -144,12 +146,16 @@ nav:
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Tracking: modules/tracking.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
- Matting: modules/matting.md
|
||||
- Gaze: modules/gaze.md
|
||||
- Head Pose: modules/headpose.md
|
||||
- Anti-Spoofing: modules/spoofing.md
|
||||
- Privacy: modules/privacy.md
|
||||
- Stores: modules/stores.md
|
||||
- Guides:
|
||||
- Overview: concepts/overview.md
|
||||
- Inputs & Outputs: concepts/inputs-outputs.md
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "2.2.1"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
version = "3.6.0"
|
||||
description = "UniFace: A Unified Face Analysis Library for Python"
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
@@ -9,10 +9,11 @@ maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.10,<3.14"
|
||||
requires-python = ">=3.10,<3.15"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"face-tracking",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
@@ -28,7 +29,7 @@ keywords = [
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Operating System :: OS Independent",
|
||||
@@ -37,28 +38,41 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"scikit-image>=0.22.0",
|
||||
"scipy>=1.7.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
cpu = [
|
||||
"onnxruntime>=1.16.0; python_version >= '3.11'",
|
||||
"onnxruntime>=1.16.0,<1.24; python_version < '3.11'",
|
||||
]
|
||||
gpu = [
|
||||
"onnxruntime-gpu>=1.16.0; python_version >= '3.11'",
|
||||
"onnxruntime-gpu>=1.16.0,<1.24; python_version < '3.11'",
|
||||
]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0", "pre-commit>=3.0.0"]
|
||||
docs = [
|
||||
"mkdocs-material",
|
||||
"pymdown-extensions",
|
||||
"mkdocs-git-committers-plugin-2",
|
||||
"mkdocs-git-revision-date-localized-plugin",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
Documentation = "https://yakhyo.github.io/uniface"
|
||||
"Quick Start" = "https://yakhyo.github.io/uniface/quickstart/"
|
||||
"Model Zoo" = "https://yakhyo.github.io/uniface/models/"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -10,6 +9,14 @@ import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
from uniface.types import Face
|
||||
|
||||
|
||||
def _make_face(bbox: list[int] | np.ndarray) -> Face:
|
||||
"""Helper: build a minimal Face from a bounding box."""
|
||||
bbox = np.asarray(bbox)
|
||||
landmarks = np.zeros((5, 2), dtype=np.float32)
|
||||
return Face(bbox=bbox, confidence=0.99, landmarks=landmarks)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -23,30 +30,30 @@ def mock_image():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bbox():
|
||||
return [100, 100, 300, 300]
|
||||
def mock_face():
|
||||
return _make_face([100, 100, 300, 300])
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, 'AgeGender model initialization failed.'
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_face):
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
def test_gender_values(age_gender_model, mock_image, mock_face):
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
def test_age_range(age_gender_model, mock_image, mock_face):
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
@@ -58,39 +65,52 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
face = _make_face(bbox)
|
||||
result = age_gender_model.predict(mock_image, face)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
def test_different_image_sizes(age_gender_model):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
face = _make_face([100, 100, 300, 300])
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result = age_gender_model.predict(mock_image, face)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
def test_consistency(age_gender_model, mock_image, mock_face):
|
||||
result1 = age_gender_model.predict(mock_image, mock_face)
|
||||
result2 = age_gender_model.predict(mock_image, mock_face)
|
||||
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_face_enrichment(age_gender_model, mock_image, mock_face):
|
||||
"""predict() must write gender & age back to the Face object."""
|
||||
assert mock_face.gender is None
|
||||
assert mock_face.age is None
|
||||
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
|
||||
assert mock_face.gender == result.gender
|
||||
assert mock_face.age == result.age
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
face = _make_face([100, 100, 300, 300])
|
||||
result = age_gender_model.predict(mock_image, face)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
face = _make_face(np.array([100, 100, 300, 300]))
|
||||
result = age_gender_model.predict(mock_image, face)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
@@ -104,7 +124,8 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
face = _make_face(bbox)
|
||||
result = age_gender_model.predict(mock_image, face)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
@@ -113,28 +134,26 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_face):
|
||||
for _ in range(5):
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_face):
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_face):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result = age_gender_model.predict(mock_image, mock_face)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -10,13 +9,15 @@ import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface import (
|
||||
create_attribute_predictor,
|
||||
create_detector,
|
||||
create_landmarker,
|
||||
create_recognizer,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.constants import AgeGenderWeights, FairFaceWeights, RetinaFaceWeights, SCRFDWeights
|
||||
from uniface.spoofing import MiniFASNet, create_spoofer
|
||||
|
||||
|
||||
# create_detector tests
|
||||
@@ -90,6 +91,12 @@ def test_create_recognizer_sphereface():
|
||||
assert recognizer is not None, 'Failed to create SphereFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_edgeface():
|
||||
"""Test creating an EdgeFace recognizer using factory function."""
|
||||
recognizer = create_recognizer('edgeface')
|
||||
assert recognizer is not None, 'Failed to create EdgeFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
"""
|
||||
Test that invalid recognizer method raises an error.
|
||||
@@ -123,60 +130,23 @@ def test_create_landmarker_invalid_method():
|
||||
create_landmarker('invalid_method')
|
||||
|
||||
|
||||
# detect_faces tests
|
||||
def test_detect_faces_retinaface():
|
||||
"""
|
||||
Test high-level detect_faces function with RetinaFace.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
def test_create_landmarker_pipnet_default():
|
||||
"""create_landmarker('pipnet') returns a PIPNet (98 points by default)."""
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
landmarker = create_landmarker('pipnet')
|
||||
assert isinstance(landmarker, PIPNet), 'Should return PIPNet instance'
|
||||
assert landmarker.num_lms == 98
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
"""
|
||||
Test high-level detect_faces function with SCRFD.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
def test_create_landmarker_pipnet_68():
|
||||
"""create_landmarker('pipnet', model_name=...) selects the 68-point variant."""
|
||||
from uniface.constants import PIPNetWeights
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
"""
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
"""
|
||||
Test detect_faces with default method (should use retinaface).
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
"""
|
||||
Test detect_faces on a blank image.
|
||||
"""
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
landmarker = create_landmarker('pipnet', model_name=PIPNetWeights.DW300_CELEBA_68)
|
||||
assert isinstance(landmarker, PIPNet), 'Should return PIPNet instance'
|
||||
assert landmarker.num_lms == 68
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
@@ -222,7 +192,7 @@ def test_recognizer_inference_from_factory():
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, 'Recognizer should return embedding'
|
||||
assert embedding.shape[1] == 512, 'Should return 512-dimensional embedding'
|
||||
assert embedding.shape == (1, 512), 'get_embedding should return (1, 512) with batch dimension'
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
@@ -238,6 +208,17 @@ def test_landmarker_inference_from_factory():
|
||||
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
|
||||
|
||||
|
||||
def test_pipnet_landmarker_inference_from_factory():
|
||||
"""PIPNet landmarker created from factory can perform inference."""
|
||||
landmarker = create_landmarker('pipnet')
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, 'Landmarker should return landmarks'
|
||||
assert landmarks.shape == (98, 2), 'Should return 98 landmarks'
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
"""
|
||||
Test that multiple detectors can be created independently.
|
||||
@@ -280,3 +261,32 @@ def test_factory_returns_correct_types():
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
|
||||
# create_spoofer tests
|
||||
def test_create_spoofer_default():
|
||||
"""Test creating a spoofer with default parameters."""
|
||||
spoofer = create_spoofer()
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
|
||||
def test_create_spoofer_with_providers():
|
||||
"""Test that create_spoofer forwards providers kwarg without TypeError."""
|
||||
spoofer = create_spoofer(providers=['CPUExecutionProvider'])
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
|
||||
# create_attribute_predictor tests
|
||||
def test_create_attribute_predictor_age_gender():
|
||||
predictor = create_attribute_predictor(AgeGenderWeights.DEFAULT)
|
||||
assert isinstance(predictor, AgeGender), 'Should return AgeGender instance'
|
||||
|
||||
|
||||
def test_create_attribute_predictor_fairface():
|
||||
predictor = create_attribute_predictor(FairFaceWeights.DEFAULT)
|
||||
assert isinstance(predictor, FairFace), 'Should return FairFace instance'
|
||||
|
||||
|
||||
def test_create_attribute_predictor_invalid():
|
||||
with pytest.raises(ValueError, match='Unsupported attribute model'):
|
||||
create_attribute_predictor('invalid_model')
|
||||
|
||||
115
tests/test_headpose.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface import HeadPose, HeadPoseResult, create_head_pose_estimator
|
||||
from uniface.headpose import BaseHeadPoseEstimator
|
||||
from uniface.headpose.models import HeadPose as HeadPoseModel
|
||||
|
||||
|
||||
def test_create_head_pose_estimator_default():
|
||||
"""Test creating a head pose estimator with default parameters."""
|
||||
estimator = create_head_pose_estimator()
|
||||
assert isinstance(estimator, HeadPose), 'Should return HeadPose instance'
|
||||
|
||||
|
||||
def test_create_head_pose_estimator_aliases():
|
||||
"""Test that factory accepts all documented aliases."""
|
||||
for alias in ('headpose', 'head_pose', '6drepnet'):
|
||||
estimator = create_head_pose_estimator(alias)
|
||||
assert isinstance(estimator, HeadPose), f"Alias '{alias}' should return HeadPose"
|
||||
|
||||
|
||||
def test_create_head_pose_estimator_invalid():
|
||||
"""Test that invalid method raises ValueError."""
|
||||
with pytest.raises(ValueError, match='Unsupported head pose estimation method'):
|
||||
create_head_pose_estimator('invalid_method')
|
||||
|
||||
|
||||
def test_head_pose_inference():
|
||||
"""Test that HeadPose can run inference on a mock image."""
|
||||
estimator = HeadPose()
|
||||
mock_image = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
|
||||
result = estimator.estimate(mock_image)
|
||||
|
||||
assert isinstance(result, HeadPoseResult), 'Should return HeadPoseResult'
|
||||
assert isinstance(result.pitch, float), 'pitch should be float'
|
||||
assert isinstance(result.yaw, float), 'yaw should be float'
|
||||
assert isinstance(result.roll, float), 'roll should be float'
|
||||
|
||||
|
||||
def test_head_pose_callable():
|
||||
"""Test that HeadPose is callable via __call__."""
|
||||
estimator = HeadPose()
|
||||
mock_image = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
|
||||
result = estimator(mock_image)
|
||||
|
||||
assert isinstance(result, HeadPoseResult), '__call__ should return HeadPoseResult'
|
||||
|
||||
|
||||
def test_head_pose_result_repr():
|
||||
"""Test HeadPoseResult repr formatting."""
|
||||
result = HeadPoseResult(pitch=10.5, yaw=-20.3, roll=5.1)
|
||||
repr_str = repr(result)
|
||||
assert 'HeadPoseResult' in repr_str
|
||||
assert '10.5' in repr_str
|
||||
assert '-20.3' in repr_str
|
||||
assert '5.1' in repr_str
|
||||
|
||||
|
||||
def test_head_pose_result_frozen():
|
||||
"""Test that HeadPoseResult is immutable."""
|
||||
result = HeadPoseResult(pitch=1.0, yaw=2.0, roll=3.0)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 99.0 # type: ignore[misc]
|
||||
|
||||
|
||||
def test_rotation_matrix_to_euler_identity():
|
||||
"""Test that identity rotation matrix gives zero angles."""
|
||||
identity = np.eye(3).reshape(1, 3, 3)
|
||||
euler = HeadPoseModel.rotation_matrix_to_euler(identity)
|
||||
|
||||
assert euler.shape == (1, 3), 'Should return (1, 3) shaped array'
|
||||
np.testing.assert_allclose(euler[0], [0.0, 0.0, 0.0], atol=1e-5)
|
||||
|
||||
|
||||
def test_rotation_matrix_to_euler_90deg_yaw():
|
||||
"""Test 90-degree yaw rotation."""
|
||||
angle = np.radians(90)
|
||||
R = np.array(
|
||||
[
|
||||
[np.cos(angle), 0, np.sin(angle)],
|
||||
[0, 1, 0],
|
||||
[-np.sin(angle), 0, np.cos(angle)],
|
||||
]
|
||||
).reshape(1, 3, 3)
|
||||
euler = HeadPoseModel.rotation_matrix_to_euler(R)
|
||||
|
||||
np.testing.assert_allclose(euler[0, 1], 90.0, atol=1e-3)
|
||||
|
||||
|
||||
def test_rotation_matrix_to_euler_batch():
|
||||
"""Test batch processing of rotation matrices."""
|
||||
batch = np.stack([np.eye(3), np.eye(3), np.eye(3)], axis=0)
|
||||
euler = HeadPoseModel.rotation_matrix_to_euler(batch)
|
||||
|
||||
assert euler.shape == (3, 3), 'Batch of 3 should return (3, 3)'
|
||||
np.testing.assert_allclose(euler, 0.0, atol=1e-5)
|
||||
|
||||
|
||||
def test_factory_returns_correct_type():
|
||||
"""Test that factory function returns BaseHeadPoseEstimator subclass."""
|
||||
estimator = create_head_pose_estimator()
|
||||
assert isinstance(estimator, BaseHeadPoseEstimator), 'Should be BaseHeadPoseEstimator subclass'
|
||||
|
||||
|
||||
def test_head_pose_with_providers():
|
||||
"""Test that HeadPose accepts providers kwarg."""
|
||||
estimator = HeadPose(providers=['CPUExecutionProvider'])
|
||||
assert isinstance(estimator, HeadPose), 'Should create with explicit providers'
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
158
tests/test_matting.py
Normal file
@@ -0,0 +1,158 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import MODNetWeights
|
||||
from uniface.matting import MODNet, create_matting_model
|
||||
|
||||
|
||||
def test_modnet_initialization():
|
||||
"""Test MODNet initialization with default weights."""
|
||||
matting = MODNet()
|
||||
assert matting is not None
|
||||
assert matting.input_size == 512
|
||||
|
||||
|
||||
def test_modnet_with_webcam_weights():
|
||||
"""Test MODNet initialization with webcam variant."""
|
||||
matting = MODNet(model_name=MODNetWeights.WEBCAM)
|
||||
assert matting is not None
|
||||
assert matting.input_size == 512
|
||||
|
||||
|
||||
def test_modnet_custom_input_size():
|
||||
"""Test MODNet with custom input size."""
|
||||
matting = MODNet(input_size=256)
|
||||
assert matting.input_size == 256
|
||||
|
||||
|
||||
def test_modnet_preprocess():
|
||||
"""Test preprocessing produces correct tensor shape and dtype."""
|
||||
matting = MODNet()
|
||||
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
tensor, orig_h, orig_w = matting.preprocess(image)
|
||||
|
||||
assert tensor.dtype == np.float32
|
||||
assert tensor.ndim == 4
|
||||
assert tensor.shape[0] == 1
|
||||
assert tensor.shape[1] == 3
|
||||
assert tensor.shape[2] % 32 == 0
|
||||
assert tensor.shape[3] % 32 == 0
|
||||
assert orig_h == 480
|
||||
assert orig_w == 640
|
||||
|
||||
|
||||
def test_modnet_preprocess_small_image():
|
||||
"""Test preprocessing with image smaller than input_size."""
|
||||
matting = MODNet(input_size=512)
|
||||
|
||||
image = np.random.randint(0, 255, (128, 128, 3), dtype=np.uint8)
|
||||
tensor, orig_h, orig_w = matting.preprocess(image)
|
||||
|
||||
assert tensor.shape[2] % 32 == 0
|
||||
assert tensor.shape[3] % 32 == 0
|
||||
assert orig_h == 128
|
||||
assert orig_w == 128
|
||||
|
||||
|
||||
def test_modnet_preprocess_large_image():
|
||||
"""Test preprocessing with image larger than input_size."""
|
||||
matting = MODNet(input_size=512)
|
||||
|
||||
image = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
|
||||
tensor, orig_h, orig_w = matting.preprocess(image)
|
||||
|
||||
assert tensor.shape[2] % 32 == 0
|
||||
assert tensor.shape[3] % 32 == 0
|
||||
assert orig_h == 1080
|
||||
assert orig_w == 1920
|
||||
|
||||
|
||||
def test_modnet_postprocess():
|
||||
"""Test postprocessing resizes matte to original dimensions."""
|
||||
matting = MODNet()
|
||||
|
||||
dummy_output = np.random.rand(1, 1, 512, 672).astype(np.float32)
|
||||
matte = matting.postprocess(dummy_output, original_size=(640, 480))
|
||||
|
||||
assert matte.shape == (480, 640)
|
||||
assert matte.dtype == np.float32
|
||||
|
||||
|
||||
def test_modnet_predict():
|
||||
"""Test end-to-end prediction."""
|
||||
matting = MODNet()
|
||||
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
matte = matting.predict(image)
|
||||
|
||||
assert matte.shape == (480, 640)
|
||||
assert matte.dtype == np.float32
|
||||
assert matte.min() >= 0.0
|
||||
assert matte.max() <= 1.0
|
||||
|
||||
|
||||
def test_modnet_callable():
|
||||
"""Test that MODNet is callable via __call__."""
|
||||
matting = MODNet()
|
||||
image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
matte = matting(image)
|
||||
|
||||
assert matte.shape == (256, 256)
|
||||
assert matte.dtype == np.float32
|
||||
|
||||
|
||||
def test_modnet_different_input_sizes():
|
||||
"""Test prediction with various image dimensions."""
|
||||
matting = MODNet()
|
||||
|
||||
sizes = [(256, 256), (480, 640), (720, 1280), (300, 500)]
|
||||
|
||||
for h, w in sizes:
|
||||
image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
matte = matting.predict(image)
|
||||
|
||||
assert matte.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert matte.dtype == np.float32
|
||||
|
||||
|
||||
# Factory tests
|
||||
|
||||
|
||||
def test_create_matting_model_default():
|
||||
"""Test factory with default parameters."""
|
||||
matting = create_matting_model()
|
||||
assert matting is not None
|
||||
assert isinstance(matting, MODNet)
|
||||
|
||||
|
||||
def test_create_matting_model_with_enum():
|
||||
"""Test factory with enum."""
|
||||
matting = create_matting_model(MODNetWeights.WEBCAM)
|
||||
assert isinstance(matting, MODNet)
|
||||
|
||||
|
||||
def test_create_matting_model_with_string():
|
||||
"""Test factory with string model name."""
|
||||
matting = create_matting_model('modnet_photographic')
|
||||
assert isinstance(matting, MODNet)
|
||||
|
||||
|
||||
def test_create_matting_model_webcam_string():
|
||||
"""Test factory with webcam string model name."""
|
||||
matting = create_matting_model('modnet_webcam')
|
||||
assert isinstance(matting, MODNet)
|
||||
|
||||
|
||||
def test_create_matting_model_invalid():
|
||||
"""Test factory with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown matting model'):
|
||||
create_matting_model('invalid_model')
|
||||
@@ -2,15 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
from uniface.parsing import BiSeNet, XSeg, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
@@ -120,3 +119,151 @@ def test_bisenet_different_input_sizes():
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
# XSeg Tests
|
||||
|
||||
|
||||
def test_xseg_initialization():
|
||||
"""Test XSeg initialization."""
|
||||
parser = XSeg()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (256, 256)
|
||||
assert parser.align_size == 256
|
||||
assert parser.blur_sigma == 0
|
||||
|
||||
|
||||
def test_xseg_with_custom_params():
|
||||
"""Test XSeg with custom parameters."""
|
||||
parser = XSeg(align_size=512, blur_sigma=5)
|
||||
assert parser.align_size == 512
|
||||
assert parser.blur_sigma == 5
|
||||
|
||||
|
||||
def test_xseg_preprocess():
|
||||
"""Test XSeg preprocessing."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy aligned face crop
|
||||
face_crop = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_crop)
|
||||
|
||||
assert preprocessed.shape == (1, 256, 256, 3) # NHWC format
|
||||
assert preprocessed.dtype == np.float32
|
||||
assert preprocessed.min() >= 0
|
||||
assert preprocessed.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_postprocess():
|
||||
"""Test XSeg postprocessing."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create dummy model output (NHWC format)
|
||||
dummy_output = np.random.rand(1, 256, 256, 1).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, crop_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_aligned():
|
||||
"""Test XSeg parse_aligned method."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy aligned face crop
|
||||
face_crop = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_with_landmarks():
|
||||
"""Test XSeg parse method with landmarks."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy image
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Create dummy 5-point landmarks
|
||||
landmarks = np.array(
|
||||
[
|
||||
[250, 200], # left eye
|
||||
[390, 200], # right eye
|
||||
[320, 280], # nose
|
||||
[260, 350], # left mouth
|
||||
[380, 350], # right mouth
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
assert mask.shape == (480, 640)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_invalid_landmarks():
|
||||
"""Test XSeg parse with invalid landmarks shape."""
|
||||
parser = XSeg()
|
||||
image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Wrong shape
|
||||
invalid_landmarks = np.array([[0, 0], [1, 1], [2, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match='Landmarks must have shape'):
|
||||
parser.parse(image, landmarks=invalid_landmarks)
|
||||
|
||||
|
||||
def test_xseg_parse_with_inverse():
|
||||
"""Test XSeg parse_with_inverse method."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy image
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Create dummy 5-point landmarks
|
||||
landmarks = np.array(
|
||||
[
|
||||
[250, 200],
|
||||
[390, 200],
|
||||
[320, 280],
|
||||
[260, 350],
|
||||
[380, 350],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Parse with inverse
|
||||
mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert face_crop.shape == (256, 256, 3)
|
||||
assert inverse_matrix.shape == (2, 3)
|
||||
|
||||
|
||||
def test_create_face_parser_xseg_enum():
|
||||
"""Test factory function with XSeg enum."""
|
||||
parser = create_face_parser(XSegWeights.DEFAULT)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, XSeg)
|
||||
|
||||
|
||||
def test_create_face_parser_xseg_string():
|
||||
"""Test factory function with XSeg string."""
|
||||
parser = create_face_parser('xseg')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, XSeg)
|
||||
|
||||
132
tests/test_pipnet_landmark.py
Normal file
@@ -0,0 +1,132 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import PIPNetWeights
|
||||
from uniface.landmark import PIPNet
|
||||
|
||||
|
||||
@pytest.fixture(scope='module', params=[PIPNetWeights.WFLW_98, PIPNetWeights.DW300_CELEBA_68])
|
||||
def pipnet_model(request):
|
||||
return PIPNet(model_name=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bbox():
|
||||
return [100, 100, 300, 300]
|
||||
|
||||
|
||||
def _expected_n_lms(model: PIPNet) -> int:
|
||||
return 98 if model.num_lms == 98 else 68
|
||||
|
||||
|
||||
def test_model_initialization(pipnet_model):
|
||||
assert pipnet_model is not None, 'PIPNet model initialization failed.'
|
||||
assert pipnet_model.num_lms in (68, 98), f'Unexpected num_lms: {pipnet_model.num_lms}'
|
||||
assert pipnet_model.input_h == pipnet_model.input_w == 256
|
||||
|
||||
|
||||
def test_landmark_detection(pipnet_model, mock_image, mock_bbox):
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
n = _expected_n_lms(pipnet_model)
|
||||
assert landmarks.shape == (n, 2), f'Expected shape ({n}, 2), got {landmarks.shape}'
|
||||
|
||||
|
||||
def test_landmark_dtype(pipnet_model, mock_image, mock_bbox):
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(pipnet_model, mock_image, mock_bbox):
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
n = _expected_n_lms(pipnet_model)
|
||||
|
||||
x_coords = landmarks[:, 0]
|
||||
y_coords = landmarks[:, 1]
|
||||
|
||||
x1, y1, x2, y2 = mock_bbox
|
||||
margin = 50
|
||||
|
||||
x_in_bounds = int(np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin)))
|
||||
y_in_bounds = int(np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin)))
|
||||
|
||||
threshold = max(int(0.9 * n), n - 5)
|
||||
assert x_in_bounds >= threshold, f'Only {x_in_bounds}/{n} x-coordinates within bounds'
|
||||
assert y_in_bounds >= threshold, f'Only {y_in_bounds}/{n} y-coordinates within bounds'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(pipnet_model, mock_image):
|
||||
n = _expected_n_lms(pipnet_model)
|
||||
test_bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[100, 100, 300, 300],
|
||||
[50, 50, 400, 400],
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (n, 2), f'Failed for bbox {bbox}'
|
||||
|
||||
|
||||
def test_consistency(pipnet_model, mock_image, mock_bbox):
|
||||
landmarks1 = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
|
||||
|
||||
|
||||
def test_different_image_sizes(pipnet_model, mock_bbox):
|
||||
n = _expected_n_lms(pipnet_model)
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (n, 2), f'Failed for image size {size}'
|
||||
|
||||
|
||||
def test_bbox_list_format(pipnet_model, mock_image):
|
||||
n = _expected_n_lms(pipnet_model)
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, [100, 100, 300, 300])
|
||||
assert landmarks.shape == (n, 2), 'Should work with bbox as list'
|
||||
|
||||
|
||||
def test_bbox_array_format(pipnet_model, mock_image):
|
||||
n = _expected_n_lms(pipnet_model)
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (n, 2), 'Should work with bbox as numpy array'
|
||||
|
||||
|
||||
def test_landmark_distribution(pipnet_model, mock_image, mock_bbox):
|
||||
landmarks = pipnet_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
|
||||
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
|
||||
|
||||
|
||||
def test_default_model_is_wflw_98():
|
||||
"""PIPNet() with no args should default to the 98-point WFLW model."""
|
||||
model = PIPNet()
|
||||
assert model.num_lms == 98
|
||||
|
||||
|
||||
def test_meanface_lookup_invalid_num_lms():
|
||||
"""get_meanface_info should reject unsupported landmark counts."""
|
||||
from uniface.landmark._meanface import get_meanface_info
|
||||
|
||||
with pytest.raises(ValueError, match='No meanface table'):
|
||||
get_meanface_info(num_lms=42)
|
||||
@@ -2,14 +2,13 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
from uniface.recognition import ArcFace, EdgeFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -36,6 +35,12 @@ def sphereface_model():
|
||||
return SphereFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def edgeface_model():
|
||||
"""Fixture to initialize the EdgeFace model for testing."""
|
||||
return EdgeFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_aligned_face():
|
||||
"""
|
||||
@@ -75,7 +80,7 @@ def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
# ArcFace get_embedding returns raw ONNX output with batch dimension
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
@@ -89,7 +94,8 @@ def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
|
||||
embedding = arcface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
# Check shape and normalization
|
||||
assert embedding.shape == (512,), f'Expected shape (512,), got {embedding.shape}'
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
@@ -126,7 +132,7 @@ def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
"""
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
# MobileFace get_embedding returns raw ONNX output with batch dimension
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
@@ -139,6 +145,7 @@ def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
assert embedding.shape == (512,), f'Expected shape (512,), got {embedding.shape}'
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
@@ -157,7 +164,7 @@ def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
"""
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
# SphereFace get_embedding returns raw ONNX output with batch dimension
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
@@ -170,10 +177,50 @@ def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
assert embedding.shape == (512,), f'Expected shape (512,), got {embedding.shape}'
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# EdgeFace Tests
|
||||
def test_edgeface_initialization(edgeface_model):
|
||||
"""Test that the EdgeFace model initializes correctly."""
|
||||
assert edgeface_model is not None, 'EdgeFace model initialization failed.'
|
||||
|
||||
|
||||
def test_edgeface_embedding_shape(edgeface_model, mock_aligned_face):
|
||||
"""Test that EdgeFace produces embeddings with the correct shape."""
|
||||
embedding = edgeface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_edgeface_normalized_embedding(edgeface_model, mock_landmarks):
|
||||
"""Test that EdgeFace normalized embeddings have unit length."""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = edgeface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
assert embedding.shape == (512,), f'Expected shape (512,), got {embedding.shape}'
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
def test_edgeface_embedding_dtype(edgeface_model, mock_aligned_face):
|
||||
"""Test that EdgeFace embeddings have the correct data type."""
|
||||
embedding = edgeface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f'Expected float32, got {embedding.dtype}'
|
||||
|
||||
|
||||
def test_edgeface_consistency(edgeface_model, mock_aligned_face):
|
||||
"""Test that the same input produces the same EdgeFace embedding."""
|
||||
embedding1 = edgeface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = edgeface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), 'Same input should produce same embedding'
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
def test_different_models_different_embeddings(arcface_model, mobileface_model, mock_aligned_face):
|
||||
"""
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||