Tutorial: Digit Recognition Using OpenCV in Python

Digit recognition is a classic computer vision problem involving identifying numeric digits in an image. OpenCV provides tools for preprocessing images, extracting features, and training models for digit recognition. In this tutorial, we’ll implement digit recognition using OpenCV.

What You’ll Learn

Table of Contents

1. Introduction to Digit Recognition

Digit recognition uses machine learning and image processing to identify numeric digits in an image. OpenCV includes prebuilt tools for:

Data preprocessing: Resizing, binarization, etc.
Feature extraction: Contours, pixel values, etc.
Classification: Using k-Nearest Neighbors or other models.

We’ll use the MNIST dataset, a benchmark dataset of handwritten digits.

2. Loading and Preprocessing Data

Example: Load and Visualize the MNIST Dataset

import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset provided by OpenCV
digits = cv2.ml.KNearest_create()
samples = np.loadtxt(cv2.samples.findFile("digits.png"), dtype=np.float32)
labels = np.repeat(np.arange(10), 500)  # 500 samples per digit

# Reshape the samples
samples = samples.reshape(-1, 20, 20)

# Display a random digit
index = np.random.randint(0, len(samples))
plt.imshow(samples[index], cmap="gray")
plt.title(f"Label: {labels[index]}")
plt.show()

3. Training a k-Nearest Neighbors (k-NN) Model

Example: Train a k-NN Model

# Preprocess data for training
samples = samples.reshape(-1, 400).astype(np.float32)  # Flatten 20x20 to 400 pixels
train_data = samples[:3500]
train_labels = labels[:3500]

# Initialize k-NN and train
knn = cv2.ml.KNearest_create()
knn.train(train_data, cv2.ml.ROW_SAMPLE, train_labels)

4. Predicting Digits Using the Trained Model

Example: Test the k-NN Model

# Test data
test_data = samples[3500:]
test_labels = labels[3500:]

# Predict using k-NN
ret, result, neighbors, dist = knn.findNearest(test_data, k=3)

# Evaluate accuracy
matches = result.flatten() == test_labels
accuracy = np.count_nonzero(matches) / len(test_labels) * 100
print(f"Accuracy: {accuracy:.2f}%")

5. Recognizing Digits in an Image

Example: Recognize Digits in a Custom Image

import cv2
import numpy as np

# Load a custom handwritten digits image
image = cv2.imread("digits_test.png", cv2.IMREAD_GRAYSCALE)
image = cv2.resize(image, (400, 400))  # Resize for consistent preprocessing

# Preprocess the image
blur = cv2.GaussianBlur(image, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                cv2.THRESH_BINARY_INV, 11, 2)

# Find contours of digits
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Load the trained k-NN model
knn = cv2.ml.KNearest_create()
knn.train(train_data, cv2.ml.ROW_SAMPLE, train_labels)

# Iterate through contours
for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)

    if h > 20:  # Ignore small contours
        roi = thresh[y:y + h, x:x + w]
        roi = cv2.resize(roi, (20, 20))
        roi = roi.reshape(1, 400).astype(np.float32)

        # Predict the digit
        ret, result, neighbors, dist = knn.findNearest(roi, k=3)
        digit = int(result[0, 0])

        # Draw the result
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(image, str(digit), (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Display the results
cv2.imshow("Digit Recognition", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

6. Practical Examples

6.1 Recognizing Digits in Real-Time Video

import cv2
import numpy as np

# Open webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)

    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if h > 50:  # Ignore small contours
            roi = thresh[y:y + h, x:x + w]
            roi = cv2.resize(roi, (20, 20))
            roi = roi.reshape(1, 400).astype(np.float32)

            # Predict the digit
            ret, result, neighbors, dist = knn.findNearest(roi, k=3)
            digit = int(result[0, 0])

            # Draw the result
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, str(digit), (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    cv2.imshow("Digit Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

6.2 Training with Custom Handwritten Digits

import cv2
import numpy as np

# Load your custom digit images
digits = []
labels = []

for i in range(10):  # Assume 10 folders for digits 0-9
    for j in range(100):  # 100 samples per digit
        filepath = f"custom_dataset/{i}/{j}.png"
        image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (20, 20))
        digits.append(image.flatten())
        labels.append(i)

# Convert to NumPy arrays
digits = np.array(digits, dtype=np.float32)
labels = np.array(labels, dtype=np.int32)

# Train k-NN model
knn = cv2.ml.KNearest_create()
knn.train(digits, cv2.ml.ROW_SAMPLE, labels)
print("Custom k-NN model trained!")

7. Summary

Key Functions

cv2.ml.KNearest_create(): Initialize a k-NN model.
cv2.findContours(): Find contours for segmentation.
cv2.adaptiveThreshold(): Binarize images for easier processing.

Best Practices

Preprocess images by resizing, binarizing, and normalizing.
Train models with clean and balanced datasets for better accuracy.
Use real-time digit recognition for applications like number plate recognition.

By mastering these techniques, you can build robust digit recognition systems using OpenCV

Digit Recognition