Не получается совместить программы
У меня есть код, который с помощью веб-камеры и cv2
:
- управляет курсором с помощью указательного пальца;
- регулирует звук расстоянием между указательным и большим пальцем.
Я их обьединил в одну программу, а так они две автономные программы: отдельная программа для курсора, отдельная для звука, работает - и то хорошо.
Код HandTrackingModule.py:
import time
import cv2
import mediapipe as mp
class handDetector:
def __init__(self, mode=False, maxHands=6 , complexity=1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.complexity = complexity
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.complexity,
self.detectionCon, self.trackCon, )
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
results = self.hands.process(imgRGB)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
xList = []
yList = []
bbox = []
self.lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
xList.append(cx)
yList.append(cy)
# print(id, cx, cy)
self.lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
bbox = xmin, ymin, xmax, ymax
if draw:
cv2.rectangle(img, (xmin - 20, ymin - 20), (xmax + 20, ymax + 20), (0, 255, 0), 2)
return self.lmList, bbox
def fingersUp(self):
fingers = []
# Thumb
if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0] - 1][1]:
fingers.append(1)
else:
fingers.append(0)
# Fingers
for id in range(1, 5):
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
fingers.append(1)
else:
fingers.append(0)
# totalFingers = fingers.count(1)
return fingers
def findDistance(self, p1, p2, img, draw=True, r=15, t=3):
x1, y1 = self.lmList[p1][1:]
x2, y2 = self.lmList[p2][1:]
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
if draw:
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), t)
cv2.circle(img, (x1, y1), r, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (x2, y2), r, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (cx, cy), r, (0, 0, 255), cv2.FILLED)
length = math.hypot(x2 - x1, y2 - y1)
return length, img, [x1, y1, x2, y2, cx, cy]
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(0)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
..........................................................................................
AIVirtualMouse.py-
import time
import cv2
import numpy as np
import pyautogui
import HandTrackingModule as htm
######################
wCam, hCam = 900, 720
frameR = 100 # Frame Reduction
smoothening = 7 # random value
######################
pTime = 0
plocX, plocY = 0, 0
clocX, clocY = 0, 0
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
detector = htm.handDetector(maxHands=1)
wScr, hScr = pyautogui.size()
while True:
# Step1: Find the landmarks
success, img = cap.read()
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
# Step2: Get the tip of the index and middle finger
if len(lmList) != 0:
x1, y1 = lmList[8][1:]
x2, y2 = lmList[12][1:]
# Step3: Check which fingers are up
fingers = detector.fingersUp()
cv2.rectangle(img, (frameR, frameR), (wCam - frameR, hCam - frameR),
(255, 0, 255), 2)
# Step4: Only Index Finger: Moving Mode
if fingers[1] == 1 and fingers[2] == 0:
# Step5: Convert the coordinates
x3 = np.interp(x1, (frameR, wCam - frameR), (0, wScr))
y3 = np.interp(y1, (frameR, hCam - frameR), (0, hScr))
# Step6: Smooth Values
clocX = plocX + (x3 - plocX) / smoothening
clocY = plocY + (y3 - plocY) / smoothening
# Step7: Move Mouse
pyautogui.moveTo(wScr - clocX, clocY)
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
plocX, plocY = clocX, clocY
# Step8: Both Index and middle are up: Clicking Mode
if fingers[1] == 1 and fingers[2] == 1:
# Step9: Find distance between fingers
length, img, lineInfo = detector.findDistance(8, 12, img)
# Step10: Click mouse if distance short
if length < 40:
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 255, 0), cv2.FILLED)
pyautogui.click()
# Step11: Frame rate
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (28, 58), cv2.FONT_HERSHEY_PLAIN, 3, (255, 8, 8), 3)
# Step12: Display
cv2.imshow("Image", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
VolumeHandControleAdvance.py -
.............................................................................................
import cv2
import time
import numpy as np
import HandTrackingModule as htm
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
################################
wCam, hCam = 640, 480
################################
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
pTime = 0
detector = htm.handDetector(detectionCon=0.7, maxHands=1)
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# volume.GetMute()
# volume.GetMasterVolumeLevel()
volRange = volume.GetVolumeRange()
minVol = volRange[0]
maxVol = volRange[1]
vol = 0
volBar = 400
volPer = 0
area = 0
colorVol = (255, 0, 0)
while True:
success, img = cap.read()
# Find Hand
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img, draw=True)
if len(lmList) != 0:
# Filter based on size
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100
# print(area)
if 250 < area < 1000:
# Find Distance between index and Thumb
length, img, lineInfo = detector.findDistance(4, 8, img)
# print(length)
# Convert Volume
volBar = np.interp(length, [50, 200], [400, 150])
volPer = np.interp(length, [50, 200], [0, 100])
# Reduce Resolution to make it smoother
smoothness = 10
volPer = smoothness * round(volPer / smoothness)
# Check fingers up
fingers = detector.fingersUp()
# print(fingers)
# If pinky is down set volume
if not fingers[4]:
volume.SetMasterVolumeLevelScalar(volPer / 100, None)
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 255, 0), cv2.FILLED)
colorVol = (0, 255, 0)
else:
colorVol = (255, 0, 0)
# Drawings
cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3)
cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED)
cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX,
1, (255, 0, 0), 3)
cVol = int(volume.GetMasterVolumeLevelScalar() * 100)
cv2.putText(img, f'Vol Set: {int(cVol)}', (400, 50), cv2.FONT_HERSHEY_COMPLEX,
1, colorVol, 3)
# Frame rate
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX,
1, (255, 0, 0), 3)
cv2.imshow("Img", img)
cv2.waitKey(1)
..............................................................................................
run.py(этот код запускает AIVirtualMouse и VolumeHandControlAdvace вместе) -
import time
from ctypes import cast, POINTER
import cv2
import numpy as np
import pyautogui
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import HandTrackingModule as htm
# Camera settings
wCam, hCam = 900, 720
frameR = 100 # Frame Reduction
smoothening = 7 # Smoothening factor for cursor movement
# Initialize variables
pTime = 0
plocX, plocY = 0, 0
clocX, clocY = 0, 0
# Capture video
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
# Initialize hand detector
detector = htm.handDetector(maxHands=1, detectionCon=0.7)
# Get screen size
wScr, hScr = pyautogui.size()
# Audio setup
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volRange = volume.GetVolumeRange()
minVol, maxVol = volRange[0], volRange[1]
vol, volBar, volPer = 0, 400, 0
area = 0
colorVol = (255, 0, 0)
while True:
# Capture frame
success, img = cap.read()
# Find hand landmarks
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
if len(lmList) != 0:
# Get coordinates of index and middle fingers
x1, y1 = lmList[8][1:]
x2, y2 = lmList[12][1:]
# Check which fingers are up
fingers = detector.fingersUp()
# Drawing rectangle for cursor movement area
cv2.rectangle(img, (frameR, frameR), (wCam - frameR, hCam - frameR), (255, 0, 255), 2)
# Cursor Movement Mode
if fingers[1] == 1 and fingers[2] == 0:
# Convert coordinates
x3 = np.interp(x1, (frameR, wCam - frameR), (0, wScr))
y3 = np.interp(y1, (frameR, hCam - frameR), (0, hScr))
# Smooth values
clocX = plocX + (x3 - plocX) / smoothening
clocY = plocY + (y3 - plocY) / smoothening
# Move mouse
pyautogui.moveTo(wScr - clocX, clocY)
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
plocX, plocY = clocX, clocY
# Clicking Mode
if fingers[1] == 1 and fingers[2] == 1:
length, img, lineInfo = detector.findDistance(8, 12, img)
if length < 40:
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 255, 0), cv2.FILLED)
pyautogui.click()
# Volume Control Mode
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100
if 250 < area < 1000:
length, img, lineInfo = detector.findDistance(4, 8, img)
# Convert length to volume
volBar = np.interp(length, [50, 200], [400, 150])
volPer = np.interp(length, [50, 200], [0, 100])
# Smooth out the volume changes
smoothness = 10
volPer = smoothness * round(volPer / smoothness)
# Change volume if pinky is down
if not fingers[4]:
volume.SetMasterVolumeLevelScalar(volPer / 100, None)
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 255, 0), cv2.FILLED)
colorVol = (0, 255, 0)
else:
colorVol = (255, 0, 0)
# Draw volume bar
cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3)
cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED)
cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 3)
# Display current volume
cVol = int(volume.GetMasterVolumeLevelScalar() * 100)
cv2.putText(img, f'Vol Set: {int(cVol)}', (400, 50), cv2.FONT_HERSHEY_COMPLEX, 1, colorVol, 3)
# Calculate and display FPS
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 3)
# Display the image
cv2.imshow("Image", img)
# Exit on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Также у меня есть код голосового ассистента - ALPHA-MAIN-APP.py
try:
import os
import json
import customtkinter
import time
import webbrowser
import logging
from num2word import word
import pyaudio
from vosk import Model, KaldiRecognizer, SetLogLevel
import torch
import sounddevice as sd
import keyboard
import numpy
import silero
import threading
from gigachat import GigaChat
from transliterate import translit
import re
from num2words import num2words
from pathlib import Path
import speech_recognition as sr
import codecs
except ImportError:
print('Не все библиотеки установлены.')
os.system(
'pip install num2word pyaudio vosk torch sounddevice keyboard silero numpy customtkinter gigachat transliterate num2words pathlib SpeechRecognition')
# Загрузка сохранённых данных
with codecs.open(Path('files/config_alpha.json').resolve(), 'r', 'utf-8') as data:
config = json.load(data)
data.close()
# Активационная фраза
if config['wakeword'] == '' or config['wakeword'] == ' ':
wakeword = "альфа"
else:
wakeword = tuple(config['wakeword'].lower().replace(',', '').split())
# Голос синтеза речи
speaker = config['voice']
# Время приёма команд без активационной фразы
time_wait = config['time']
# Модель синтеза речи
model_id = config['sintez']
# Модель распознавания речи
if config['rasp'] == '0.22':
model = Model('vosk-model-small-ru-0.22')
elif config['rasp'] == '0.4':
model = Model('vosk-model-small-ru-0.4')
# Вариант распознавания
recognition = config['rasp_type']
# API GigaChat
gc_api = config['gc_api']
# Массивы с ключевыми словами, которые нужно удалить из команды или изменить
to_replace = ['найди ', 'поищи', 'включи ', 'включить ', 'включил ', 'музыка ', 'музыку ', 'песня ', 'песню', 'видео ']
to_replace_write = ['напиши', 'введи']
to_replace_special = [['точка с запятой', ';'], ['запятая', ','], ['точка', '.'], ['дефис ', '-'], ['двоеточие', ':'],
['знак вопроса', '?'], ['восклицательный знак', '!']]
# Неизменяемые данные
sample_rate = 48000
language = 'ru'
device = torch.device('cpu')
put_accent = True
put_yo = True
rec = KaldiRecognizer(model, 16000)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
stream.start_stream()
giga = GigaChat(credentials=gc_api, scope='GIGACHAT_API_PERS', verify_ssl_certs=False)
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
mic = sr.Microphone()
r = sr.Recognizer()
time_ = 0
# Распознавание речи
def listen():
while True:
data = stream.read(4000, exception_on_overflow=False)
if (rec.AcceptWaveform(data)) and (len(data) > 0):
com_rec = json.loads(rec.Result())
if com_rec['text']:
yield com_rec['text']
# Синтез речи
def speak(text):
logging.info('Асссистент: ' + text)
text = translit(text.replace('c', 'к'), 'ru').lower().replace('w', 'в').replace('x', 'кс')
num = re.findall(r'-?\d+\+?', text)
if num != []:
for i in num:
numt = num2words(int(i), lang='ru')
text = text.replace(i, numt)
audio = model.apply_tts(text, speaker=speaker, sample_rate=sample_rate, put_accent=put_accent, put_yo=put_yo)
sd.play(audio, sample_rate)
time.sleep(len(audio) / sample_rate + 1.7)
sd.stop()
# Функции
def open_(param):
try:
os.startfile(param)
except:
webbrowser.open(param)
def search(param):
zapros = com_rec.lower()
for i in wakeword:
zapros = zapros.replace(i + ' ', '')
zapros = zapros.lower().replace('найди ', '')
webbrowser.open('https://www.google.com/search?q=' + zapros)
def search_song(param):
zapros = com_rec.lower()
for i in wakeword:
zapros = zapros.replace(i + ' ', '')
for i in to_replace:
zapros = zapros.replace(i, '')
webbrowser.open('https://music.yandex.ru/search?text=' + zapros)
def search_video(param):
zapros = com_rec.lower()
for i in wakeword:
zapros = zapros.replace(i + ' ', '')
for i in to_replace:
zapros = zapros.replace(i, '')
webbrowser.open('https://www.youtube.com/results?search_query=' + zapros)
def browser(param):
eval(f'{param}()')
def new_tab():
keyboard.send('ctrl+t')
def incognito_tab():
keyboard.send('ctrl+shift+n')
def prev_tab():
keyboard.send('ctrl+shift+tab')
def next_tab():
keyboard.send('ctrl+tab')
def down():
keyboard.send('pagedown')
def up():
keyboard.send('pageup')
def end():
keyboard.send('end')
def home():
keyboard.send('home')
def tell(param):
zapros = com_rec.lower()
for i in wakeword:
zapros = zapros.replace(i + ' ', '')
response = giga.chat(zapros + '. Ответ должен быть очень кратким')
speak(response.choices[0].message.content)
def write_text(param):
text_to_write = com_rec.lower()
for i in wakeword:
text_to_write = text_to_write.replace(i + ' ', '')
for i in to_replace_write:
text_to_write = text_to_write.replace(i + ' ', '')
for i in to_replace_special:
text_to_write = text_to_write.replace(' ' + i[0], i[1])
keyboard.write(text_to_write + ' ')
def repeat_recognized_text(text):
speak(text)
def main_func(com):
exec = False
global time_
if com.startswith(wakeword) or time.time() - time_ < time_wait:
if com.startswith(wakeword):
time_ = time.time()
logging.info('Распознано: ' + com)
com = com.split()
# Веса категорий и параметров
with codecs.open(Path('files/we.json').resolve(), 'r', 'utf-8') as data_we:
we = json.load(data_we)
data_we.close()
# Определение весов
for i in com:
try:
ind_kw = 0
for ind_kw in range(len(kw['main'][i])):
we['main'][kw['main'][i][ind_kw]['param']] += kw['main'][i][ind_kw]['weight']
except:
pass
ca = max(we['main'], key=we['main'].get)
for i in com:
try:
ind_kw = 0
for ind_kw in range(len(kw[ca][i])):
we[ca][kw[ca][i][ind_kw]['param']] += kw[ca][i][ind_kw]['weight']
exec = True
except:
pass
# Исполнение команды
if exec:
pa = max(we[ca], key=we[ca].get)
eval(f'{ca}(r"{pa}")')
else:
pass
# Ключевые фразы
with codecs.open(Path('files/kw.json').resolve(), 'r', 'utf-8') as data_kw:
kw = json.load(data_kw)
data_kw.close()
# Загрузка модели синтеза речи
model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=language, speaker=model_id)
model.to(device)
# Основной цикл
with mic as source:
if recognition == 'Google Speech Recognition':
r.adjust_for_ambient_noise(source, duration=1)
while True:
com_rec = r.listen(source)
try:
com_rec = r.recognize_google(com_rec, language='ru-RU')
main_func(com_rec.lower())
repeat_recognized_text(com_rec) # Вызов функции повторения распознанного текста
except:
pass
else:
for com_rec in listen():
main_func(com_rec.lower())
repeat_recognized_text(com_rec) # Вызов функции повторения распознанного текста
popitka1.py - я пробую использовать потоки
import threading
def run_assistant():
#здесь я вставил код ALPHA_MAIN.py
def run_camera():
#здесь полная копия кода run.py
thread_assistant = threading.Thread(target=run_assistant)
thread_camera = threading.Thread(target=run_camera)
thread_assistant.start()
thread_camera.start()
thread_assistant.join()
thread_camera.join()
Призапуске программы выше, запускается и то и то (на том спасибо), но голосовой ассистент не реагирует на команды. Он их распознаёт, на не реагирует.
В общем, я хочу добавить к run.py ещё и этого голосового ассисента, пытался , но работает так: либо ассистент и не открывается окно камеры, либо работает всё, но ассистент не реагирует на команды.
Буду очень признателен если кто поможет.
Ответы (1 шт):
Кажется, нашёл вашу ошибку в функции def main_func(com):
, вы используете global time_
и при выполнении кода за функцией это будет работать т.к. time_
будет объявлена глобально, но когда вы записываете код под функцию time_
будет объявлена локально в функции, тогда как global
берёт только глобальные переменные. Вот код который воспроизводит эту проблему:
global_variable = 5
def main():
parent_variable = 3
def h():
global parent_variable, global_variable
parent_variable = 0
global_variable = 0
h()
print(f"global_variable: {global_variable} parent_variable: {parent_variable}")
main()
Результат:
global_variable: 0 parent_variable: 3
Дабы исправить вашу проблему, вы можете вынести всё до with mic as source:
за функцию (это не должно повлиять на поток). run_assistant
будет выглядеть так:
def run_assistant():
# Основной цикл
with mic as source:
if recognition == 'Google Speech Recognition':
r.adjust_for_ambient_noise(source, duration=1)
while True:
com_rec = r.listen(source)
try:
com_rec = r.recognize_google(com_rec, language='ru-RU')
main_func(com_rec.lower())
repeat_recognized_text(com_rec) # Вызов функции повторения распознанного текста
except:
pass
else:
for com_rec in listen():
main_func(com_rec.lower())
repeat_recognized_text(com_rec) # Вызов функции повторения распознанного текста