Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions AI_MouseHandGuestures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pyttsx3
import time


class VirtualMouse:
def __init__(self, camera_width=1280, camera_height=720, speed_factor=2):
self.camera_width = camera_width
self.camera_height = camera_height
self.speed_factor = speed_factor
self.screen_width, self.screen_height = pyautogui.size()

# Gesture flags
self.left_click_gesture = False
self.right_click_gesture = False
self.scroll_gesture = False
self.mouse_movement_gesture = False
self.double_click_gesture = False
self.last_click_time = 0

# Initialize MediaPipe and speech engine
self.mp_drawing = mp.solutions.drawing_utils
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(max_num_hands=1)
self.engine = pyttsx3.init()

# Camera setup
self.cap = cv2.VideoCapture(0)

def speak(self, text):
""" Function to speak text using pyttsx3 """
self.engine.say(text)
self.engine.runAndWait()

def process_gestures(self, hand_landmarks):
""" Function to process gestures from hand landmarks """
finger_coords = np.array([[l.x * self.camera_width, l.y * self.camera_height] for l in hand_landmarks.landmark])
wrist_coords = np.array([hand_landmarks.landmark[0].x * self.camera_width, hand_landmarks.landmark[0].y * self.camera_height])

# Thumb movement for mouse movement
thumb_coords = finger_coords[4]
if thumb_coords[0] < wrist_coords[0] - 50: # Adjust the threshold for thumb movement
self.mouse_movement_gesture = True
else:
self.mouse_movement_gesture = False

# Left click gesture
if finger_coords[8][1] < finger_coords[6][1] and finger_coords[12][1] > finger_coords[10][1]:
self.left_click_gesture = True
else:
self.left_click_gesture = False

# Right click gesture
if np.linalg.norm(finger_coords[4] - finger_coords[8]) < 40 and all(np.linalg.norm(finger_coords[i] - wrist_coords) > 40 for i in range(1, 5)):
self.right_click_gesture = True
else:
self.right_click_gesture = False

# Double click gesture
if np.linalg.norm(finger_coords[8] - finger_coords[12]) < 40:
if time.time() - self.last_click_time < 0.3:
self.double_click_gesture = True
self.last_click_time = time.time()
else:
self.double_click_gesture = False

# Scroll gesture
if finger_coords[16][1] > finger_coords[14][1]:
self.scroll_gesture = True
else:
self.scroll_gesture = False

def perform_actions(self):
""" Perform mouse actions based on recognized gestures """
if self.left_click_gesture:
pyautogui.click()

if self.right_click_gesture:
pyautogui.click(button='right')

if self.double_click_gesture:
pyautogui.doubleClick()

if self.scroll_gesture:
pyautogui.scroll(10) # Scroll down

def move_mouse(self, thumb_coords):
""" Move the mouse based on thumb coordinates """
new_x = max(0, min(thumb_coords[0] * self.speed_factor, self.screen_width - 1))
new_y = max(0, min(thumb_coords[1] * self.speed_factor, self.screen_height - 1))
pyautogui.moveTo(new_x, new_y)

def process_frame(self, image):
""" Process each frame to detect gestures and perform actions """
display_image = cv2.flip(image, 1)
image_rgb = cv2.cvtColor(display_image, cv2.COLOR_BGR2RGB)
results = self.hands.process(image_rgb)

if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
self.mp_drawing.draw_landmarks(display_image, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)

self.process_gestures(hand_landmarks)

# Thumb coordinates for mouse movement
thumb_coords = np.array([hand_landmarks.landmark[4].x * self.camera_width, hand_landmarks.landmark[4].y * self.camera_height])

# Perform actions based on gestures
self.perform_actions()

# Move the mouse
if self.mouse_movement_gesture:
self.move_mouse(thumb_coords)

# Draw circles on hand landmarks for visualization
finger_coords = np.array([[l.x * self.camera_width, l.y * self.camera_height] for l in hand_landmarks.landmark])
cv2.circle(display_image, tuple(np.array([finger_coords[8][0], finger_coords[8][1]]).astype(int)), 10, (255, 0, 0), -1) # Blue color
cv2.circle(display_image, tuple(np.array([finger_coords[12][0], finger_coords[12][1]]).astype(int)), 10, (255, 0, 0), -1) # Blue color
cv2.circle(display_image, tuple(np.array([finger_coords[4][0], finger_coords[4][1]]).astype(int)), 10, (255, 192, 203), -1) # Pink color
cv2.circle(display_image, tuple(np.array([finger_coords[16][0], finger_coords[16][1]]).astype(int)), 10, (0, 255, 0), -1) # Green color

return display_image

def run(self):
""" Main loop to run the Virtual Mouse """
self.speak("Press Esc key to exit the program.")

while self.cap.isOpened():
success, image = self.cap.read()
if not success:
print("Ignoring empty camera frame.")
continue

display_image = self.process_frame(image)

cv2.imshow('Virtual Mouse', display_image)

if cv2.waitKey(1) & 0xFF == 27: # Exit if 'Esc' key is pressed
break

self.cap.release()
cv2.destroyAllWindows()


if __name__ == "__main__":
virtual_mouse = VirtualMouse()
virtual_mouse.run()
141 changes: 141 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# AI Virtual Mouse

## This Project is Done By:
- **Tajanpure Kiran Ambadas**

---

## Project Description
The **AI Virtual Mouse** project aims to create a touchless mouse interface that allows users to control their computers using hand gestures and movements. This innovative system uses a camera to monitor hand movements and translates those movements into actions on the computer screen, such as moving the mouse pointer or clicking.

The core of this project involves:
1. **Capturing Real-Time Visuals**: Utilizing a webcam to record hand movements.
2. **Processing Visual Data**: Using advanced computer vision techniques with libraries like OpenCV and MediaPipe to identify key landmarks on the hand (fingertips, joints, etc.).
3. **Recognizing Gestures**: Translating gestures into mouse actions, like moving the cursor or performing clicks.

---

## Key Components

### 1) Camera
Captures real-time images or videos of the user's hand movements.
### 2) Computer Vision
Analyzes the captured visuals to detect and interpret hand gestures using machine learning models.
### 3) Software
Implements the system using programming languages like Python, alongside libraries such as OpenCV and MediaPipe.

---

## Why We Selected This Project
1. **Innovation**: Utilizes cutting-edge technologies like computer vision and AI.
2. **Accessibility**: Provides a hands-free control system, aiding individuals who cannot use a traditional mouse.
3. **Practical Use**: Finds applications in gaming, healthcare, and other environments where physical contact is limited.

---

## Challenges Faced
1. **Gesture Recognition Accuracy**: Achieving high precision in detecting hand gestures in different lighting conditions.
2. **Hardware Limitations**: Ensuring the system works efficiently on devices with low processing power.
3. **Real-time Performance**: Reducing lag between gesture input and system response.
4. **Gesture Overlap**: Preventing confusion when similar gestures are performed consecutively.

### How We Overcame These Challenges
- Fine-tuned the gesture detection models to work in variable lighting.
- Optimized the code to ensure smooth performance on lower-end systems.
- Implemented gesture differentiation algorithms to avoid overlaps.

---


## Security Features
To ensure only authorized users can access the AI Virtual Mouse, several security features are included:
- **User Authentication**: Requires login credentials before using the system.
- **Gesture-Based Authentication**: Employs specific gestures as an additional layer of security.
- **Access Logs**: Maintains a record of usage to monitor unauthorized access.

---

## Libraries Used
1. **NumPy**: Handles large datasets and performs numeric and scientific computations.
2. **OpenCV (cv2)**: Processes images and videos to detect hand gestures.
3. **MediaPipe**: Tracks hand movements in real-time, recognizing landmarks and gestures.
4. **PyAutoGUI**: Controls the mouse and keyboard, enabling hand gestures to perform actions like moving and clicking.
5. **pyttsx3**: library converts text to speech, allowing you to set properties like voice, rate, and volume, and speak or save text as audio.
---

## Example of Implementation
Below is an example image of how the AI Virtual Mouse works in real time:

**steps to Execute-**
- step 1)
## install all libraries:
```
pip install pyttsx3
pip install opencv-python
pip install mediapipe
pip install pyautogui
```
make sure your version of python is satisfies the libraries requirement ...we are using python 3.12
```
python --version
```
- step 2)
-
![Screenshot (97)](https://github.com/user-attachments/assets/6f64a456-cccd-4be9-80da-73ca50f1ae6d)

- step 3)
-
![Screenshot (93)](https://github.com/user-attachments/assets/23e7f71a-1d8c-431e-83b4-07ad8e20039f)
- step 4)
-
![Screenshot 2025-01-01 122819](https://github.com/user-attachments/assets/285dd0cb-b049-4667-8474-7e688b33bd87)
- step 4)
-
![Screenshot (94)](https://github.com/user-attachments/assets/f034a790-a512-4abf-9112-6f4ba0be4a94)
![Screenshot (95)](https://github.com/user-attachments/assets/dd968ad7-548d-426b-9901-43ae99bafb80)
![Screenshot (99)](https://github.com/user-attachments/assets/de8d35e5-f4b6-4725-baba-60ccf02eaf90)

This is the additional Feature GUI we added to it
---
## Applications
- **Gaming**: Hands-free control enhances gaming experiences.
- **Healthcare**: Useful in sterile environments where touch is not allowed.
- **Assistive Technology**: Aids individuals with physical disabilities.

---

## Contact
For inquiries, feel free to reach out to us:
- **Email**: kirantajanpure48@gmail.com
- **GitHub**: [https://github.com/kiran28092003](https://github.com/kiran28092003)
- **LinkedIn**: [https://www.linkedin.com/in/kiran-tajanpure-a7509225b](https://www.linkedin.com/in/kiran-tajanpure-a7509225b)

---

## Future Scope

1. **Gesture Expansion**
Introduce a wider range of gestures for advanced mouse operations, such as drag-and-drop, zooming, and scrolling.

2. **Voice Integration**
Combine gesture recognition with voice commands for a hybrid hands-free interface.

3. **Cross-Platform Compatibility**
Extend support to various operating systems like macOS, Linux, and mobile platforms.

4. **Improved Accuracy**
Enhance gesture recognition algorithms to work effectively in diverse lighting conditions and backgrounds.

5. **AR/VR Integration**
Use the AI Virtual Mouse in augmented reality (AR) and virtual reality (VR) environments for immersive user experiences.

6. **Accessibility Enhancements**
Adapt the system further for individuals with disabilities, making it more inclusive and accessible.

7. **Customizable UI**
Provide users with the ability to configure gestures and corresponding actions according to their preferences.

8. **Disaster Management Integration**
Utilize AI Virtual Mouse technology in disaster response scenarios, enabling hands-free control of systems for monitoring, communication, and rescue operations in hazardous environments.

---
Binary file added Screenshot (93).png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading