Noise Detector
noise_detector.py
# Threshold / Sliding window
# https://raw.githubusercontent.com/jeysonmc/python-google-speech-scripts/master/stt_google.py
# WebSocket streaming:
# https://gist.github.com/fopina/3cefaed1b2d2d79984ad7894aef39a68
import pyaudio
import wave
import audioop
import subprocess
import os
import time
import math
import struct
import threading
import io
import numpy as np
from collections import deque
from lock_manager import Lock_Manager
from util import Util
class Noise_Detector(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.name = self.__class__.__name__
self.FORMAT = pyaudio.paFloat32
self.RATE = 48000 # Hz, so samples (bytes) per second
self.CHUNK_SIZE = 8192 # How many bytes to read from mic each time (stream.read())
self.CHUNKS_PER_SEC = math.floor(self.RATE / self.CHUNK_SIZE) # How many chunks make a second? (16.000 bytes/s, each chunk is 1.024 bytes, so 1s is 15 chunks)
self.CHANNELS = 1
self.HISTORY_LENGTH = 2 # Seconds of audio cache for prepending to records to prevent chopped phrases (history length + observer length = min record length)
self.OBSERVER_LENGTH = 5 # Time in seconds to be observed for noise
self.NOTIFICATION_LIMIT = 1 # Seconds before a notification is sent
self.LIMIT_RECODING = 100 # 최대 Recoding chunk 수
self.CURRENT_RECODING_TIME = 0 # 현재 Recoding chunk 수
self.REMAIN_RECORDING_FILES = 3 # 10이상 부터 삭제 후 저장
self.RECODING_OVER_THRESHOLD = 5 # Recoding 임계값을 연속 넘는 회수로 저장 여부 판단
self.archive = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'archive')
self.current_file = None
self.chunk = None
self.record = [] # Stores audio chunks
self.notified = False # If we already sent a notification
self.audio = pyaudio.PyAudio()
self.stream = self.get_stream()
self.threshold = self.determine_threshold()
self.lock_manager = Lock_Manager("noise")
self.detected_at = None
def __del__(self):
# Stop recording
if self.stream:
self.stream.close()
if self.audio:
self.audio.terminate()
# Remove lock if exists
self.lock_manager.remove()
def get_stream(self):
"""
Open audio stream
@return PyAudio
"""
return self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK_SIZE
)
def determine_threshold(self):
"""
Determine threshold noise intensity using RMS
Anything below the threshold is considered silence
@return float
"""
Util.log(self.name, "Determining threshold...")
res = []
for x in range(50):
block = self.stream.read(self.CHUNK_SIZE)
rms = self.get_rms(block)
res.append(rms)
# Set threshold to 20% above avergae
threshold = (sum(res) / len(res)) * 1.7 #1.2
Util.log(self.name, "Setting threshold to: " + str(threshold))
return threshold
def get_rms(self, block):
"""
Calculate Root Mean Square (noise level) for audio chunk
@param bytes block
@return float
"""
d = np.frombuffer(block, np.float32).astype(np.float)
return np.sqrt((d * d).sum() / len(d))
def start_recording(self):
"""
Setup the recorder
"""
self.current_file = self.archive + "/" + self.detected_at + ".wav"
Util.log(self.name, "Noise detected! Recording...")
def stop_recording(self):
"""
Reset variables to default
"""
self.current_file = None
self.detected_at = None
self.notified = False
self.record = []
self.CURRENT_RECODING_TIME = 0
def run(self):
"""
Detect noise from microphone and record
Noise is defined as sound surrounded by silence (according to threshold)
"""
# Stores audio intensity of previous sound-chunks
# If one of these chunks is above threshold, recording gets triggered
# Keep the last {OBSERVER_LENGTH} seconds in observer
observer = deque(maxlen=self.OBSERVER_LENGTH * self.CHUNKS_PER_SEC)
# Prepend audio from before noise was detected
# Keep the last {HISTORY_LENGTH} seconds in history
history = deque(maxlen=self.HISTORY_LENGTH * self.CHUNKS_PER_SEC)
Util.log(self.name, "Listening...")
try:
while True:
# Current chunk of audio data
self.chunk = self.stream.read(self.CHUNK_SIZE, exception_on_overflow = False)
history.append(self.chunk)
# Add noise level of this chunk to the sliding-window
rms = self.get_rms(self.chunk)
#Util.log(self.name, "Noise threshold=" + str(rms))
observer.append(rms)
if self.detected(sum([x > self.threshold for x in observer]) > self.RECODING_OVER_THRESHOLD) and self.LIMIT_RECODING > self.CURRENT_RECODING_TIME:
self.CURRENT_RECODING_TIME = self.CURRENT_RECODING_TIME + 1
# There's at least one chunk in the sliding-window above threshold
if not self.recording():
self.start_recording()
#Util.log(self.name, "Record.append noise level="+ str(sum([x > self.threshold for x in observer])) + ", time=" + str(self.CURRENT_RECODING_TIME) )
self.record.append(self.chunk)
if not self.notified and len(self.record) > self.NOTIFICATION_LIMIT * self.CHUNKS_PER_SEC:
self.notify()
elif self.recording():
# Silence limit was reached, finish recording and save
self.delete()
self.save(list(history) + self.record)
self.stop_recording()
Util.log(self.name, "Listening...")
except KeyboardInterrupt:
Util.log(self.name, "Interrupted.")
def get_chunk(self):
"""
Return the current chunk of audio data
@return bytes
"""
return self.chunk
def delete(self):
"""
delete mic data to a WAV file.
@param list data
"""
count = 0
Util.log(self.name, "Delete audio...")
for filename in sorted(os.listdir(self.archive), reverse=True):
if not filename.startswith('.'):
type = self.get_type(filename)
if type == "audio":
count = count + 1
if self.REMAIN_RECORDING_FILES < count:
Util.log(self.name, "Delete audio filename=" + filename + ", type=" + type + ", count=" + str(count))
os.remove(self.archive + "/" + filename)
def get_type(self, filename):
name, extension = os.path.splitext(filename)
return 'video' if extension == '.mp4' else 'video' if extension == '.avi' else 'audio' if extension == '.wav' else 'audio' if extension == '.mp3' else 'photo'
def save(self, data):
"""
Save mic data to a WAV file.
@param list data
"""
Util.log(self.name, "Saving audio...")
# Flatten the list
data = b''.join(data)
# Write converted data to file
with open(self.current_file, "wb+") as file:
file.write(self.generate_wav(data))
# Convert 음질 개떡
#self.convert_to_mp3(self.current_file)
def bytes_to_array(self, bytes, type):
"""
Convert raw audio data to TypedArray
@param bytes bytes
@return numpy-Array
"""
return np.frombuffer(bytes, dtype=type)
def generate_wav(self, raw):
"""
Create WAVE-file from raw audio chunks
@param bytes raw
@return bytes
"""
# Check if input format is supported
if self.FORMAT not in (pyaudio.paFloat32, pyaudio.paInt16):
print("Unsupported format")
return
# Convert raw audio bytes to typed array
samples = self.bytes_to_array(raw, np.float32)
# Get sample size
sample_size = pyaudio.get_sample_size(self.FORMAT)
# Get data-length
byte_count = (len(samples)) * sample_size
# Get bits/sample
bits_per_sample = sample_size * 8
# Calculate frame-size
frame_size = int(self.CHANNELS * ((bits_per_sample + 7) / 8))
# Container for WAVE-content
wav = bytearray()
# Start RIFF-Header
wav.extend(struct.pack('<cccc', b'R', b'I', b'F', b'F'))
# Add chunk size (data-size minus 8)
wav.extend(struct.pack('<I', byte_count + 0x2c - 8))
# Add RIFF-type ("WAVE")
wav.extend(struct.pack('<cccc', b'W', b'A', b'V', b'E'))
# Start "Format"-part
wav.extend(struct.pack('<cccc', b'f', b'm', b't', b' '))
# Add header length (16 bytes)
wav.extend(struct.pack('<I', 0x10))
# Add format-tag (e.g. 1 = PCM, 3 = FLOAT)
wav.extend(struct.pack('<H', 3))
# Add channel count
wav.extend(struct.pack('<H', self.CHANNELS))
# Add sample rate
wav.extend(struct.pack('<I', self.RATE))
# Add bytes/second
wav.extend(struct.pack('<I', self.RATE * frame_size))
# Add frame size
wav.extend(struct.pack('<H', frame_size))
# Add bits/sample
wav.extend(struct.pack('<H', bits_per_sample))
# Start data-part
wav.extend(struct.pack('<cccc', b'd', b'a', b't', b'a'))
# Add data-length
wav.extend(struct.pack('<I', byte_count))
# Add data
for sample in samples:
wav.extend(struct.pack("<f", sample))
return bytes(wav)
def convert_to_mp3(self, path):
"""
Convert wav-file to mp3
@param string path
"""
Util.log(self.name, "Converting audio...")
try:
cmd = 'lame --preset insane "{}" 2> /dev/null && rm "{}"'.format(path, path)
p = subprocess.Popen(cmd, shell=True)
(output, err) = p.communicate()
except subprocess.CalledProcessError:
Util.log(self.name, "Error converting audio")
def detected(self, has_noise):
"""
Check if this or another detector detected something
@param boolean has_noise
@return boolean
"""
if has_noise:
self.lock_manager.set()
else:
self.lock_manager.remove()
self.detected_at = self.lock_manager.get_lock_time()
return self.detected_at is not None
def recording(self):
"""
Check if currently recording
@return boolean
"""
return len(self.record) > 0
def notify(self):
"""
Notify
"""
Util.log(self.name, "Notifying")
self.notified = True
if __name__ == "__main__":
nd = Noise_Detector()
nd.start()