728x90
반응형
Mediapipe
딥러닝을 이용한 라이브러리 중에 영상 정보를 통한 다양한 기능을 가진 Mediapipe가 있다.
https://developers.google.com/mediapipe/solutions
이것을 통해서 카메라를 통해 허공에서 손을 움직여 마우스 조종을 할 수 있게 만들어 봤다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
import mediapipe as mp
import cv2
import numpy as np
import math
import pyautogui
import time
#return distance of (x1, y1), (x2, y2)
def dist_func(x1, y1, x2, y2):
return math.sqrt(math.pow(x1-x2, 2)+math.pow(y1-y2, 2))
#return mid point of (x1, y1), (x2, y2)
def mid_point(x1, y1, x2, y2):
return (x1+x2)/2., (y1+y2)/2.
#update mouse position
def update_mouse_pos(befMousePos, befPointPos, curPointPos, time, sen = 1000):
x = befMousePos[0] + (curPointPos[0] - befPointPos[0])*sen
y = befMousePos[1] + (curPointPos[1] - befPointPos[1])*sen
pyautogui.moveTo(x, y, time)
#return True, if pinch gesture
def pinch_gesture(landmark):
fingers = [False for i in range(5)]
for i in range(1, 5):
dist_a = dist_func(landmark[i*4+4].x, landmark[i*4+4].y, landmark[0].x, landmark[0].y)
dist_b = dist_func(landmark[i*4+2].x, landmark[i*4+2].y, landmark[0].x, landmark[0].y)
if dist_a < dist_b:
fingers[i] = True
if not fingers[1] and fingers[2] and fingers[3] and fingers[4]:
return True
else:
return False
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
camera = cv2.VideoCapture(1)
mousePos = pyautogui.position()
befMousePos = [mousePos.x, mousePos.y]
befPointPos = [0.5, 0.5]
befTime = time.time()
befHandResult = False
with mp_hands.Hands(
max_num_hands=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
while camera.isOpened():
success, image = camera.read()
if not success:
continue
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
results = hands.process(image)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
thumb = hand_landmarks.landmark[4]
index = hand_landmarks.landmark[8]
if pinch_gesture(hand_landmarks.landmark):
#move mouse part
curPointPos = mid_point(thumb.x, thumb.y, index.x, index.y)
update_mouse_pos(befMousePos, befPointPos, curPointPos, time.time()-befTime)
#click part
distTI = dist_func(thumb.x, thumb.y, index.x, index.y)
if distTI < 0.05:
pyautogui.click()
befHandResult = True
befPointPos = mid_point(thumb.x, thumb.y, index.x, index.y)
mp_drawing.draw_landmarks(
image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
else:
befHandResult = False
befTime=time.time()
mousePos = pyautogui.position()
befMousePos = [mousePos.x, mousePos.y]
#cv2.imshow('image', image)
#if cv2.waitKey(1) == ord('q'):
# break
|
cs |
집게 손 모양을 할 때에만 마우스 포인터의 위치를 조정할 수 있고 무언갈 집는 손동작으로 클릭 기능을 구현했다.
728x90
반응형
'Deep Learning' 카테고리의 다른 글
RHO-1: Not All Tokens are What You Need 논문 리뷰 (0) | 2024.05.03 |
---|---|
Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets 논문 리뷰 (0) | 2024.04.01 |
Recurrent Neural Network in Tensorflow (2) | 2024.01.07 |
Convolution Neural Network in Tensorflow (0) | 2023.12.15 |
Gradient descent algorithm in Tensor flow 경사하강법 (0) | 2023.11.05 |