AnthonyJbs 0 Newbie Poster

Hello,

I have created pretty simple voice chat with pyaudio, but the voice is kinda meh. You hear usually some noise like in the old movies. It is probably caused by missing CHUNKs of voice which I send over UDP. Is it possible to somehow reduce the noise?
Further I want to play a sound when user moves over a button but it is impossible for some reason to merge those two tracks!

This is the most important class "Sound". It is running in thread so it can run in forever loop.

import numpy as np
import pyaudio
import wave      # I play the buttons effects from wav file

class Sound():

    WIDTH = 2
    CHANNELS = 2
    RATE = 44100

    def __init__(self, parent = None):
        super(Sound, self).__init__(parent)
        self.voiceStreams= []
        self.effectStreams= []

        self.vVolume= 1
        self.eVolume= 0.5

        self.voip = None

        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format = self.p.get_format_from_width(Sound.WIDTH),
                        channels = Sound.CHANNELS,
                        rate = Sound.RATE,
                        input = True,
                        output = True,
                        #stream_callback = self.callback
                        )


        self.nextSample = ""
        self.lastSample = ""

        self.stream.start_stream()

    def addVoice(self, b):
        b = np.fromstring(b, np.int16)
        self.voiceStreams.append(b)

    def addEffect(self, name):
        self.effectStreams.append(wave.open(name, "rb"))

    def newVVolume(self, v):
        self.vVolume = v

    def newEVolume(self, v):
        self.eVolume = v

    def run(self):
        while True:
            self.myCallback()

    def myCallback(self):
        _time = time.clock()
        if self.nextSample:
            self.stream.write(self.nextSample) 
            self.lastSample = self.nextSample
        elif self.lastSample:   # I have got some crazy idea, that when there are no data (because UDP doesnt deliver them) I could play the last data, so nobody hears the short silence noise
            self.stream.write(self.lastSample)
            self.lastSample = ""
        _time = time.clock()
        #print ("{0:d}  ---- {1:d} --- timeWrite: {2:.1f}".format(len(self.voiceStreams), self.stream.get_read_available(), (time.clock() - _time)* 1000)   , end = "   ")
        if self.stream.get_read_available() > 1023:
            mic = self.stream.read(1024)
        else:
            mic = ""
        #print ("timeRead: {0:.1f}".format(  (time.clock() - _time)* 1000)  , end = "   ")

        if mic and self.voip: self.voip.sendDatagram(mic)             #This sends the CHUNK of sound to my UDP client

        _time = time.clock()
        data = np.zeros(2048, np.int64)

        length = len(self.voiceStreams)         # I read voice data
        l1 = length
        for i in range(length):
            s = self.voiceStreams.pop(0)
            data += s / length * self.vVolume * 0.4 # Here i merge multiple voices with numpy. I also reduce the volume of each voice based on how voices I have...

        length = len(self.effectStreams)        
        toPop= []                               # Here i hold indexes of effects which ended playing
        for i in range(length):
            s = self.effectStreams[i].readframes(1024)
            if s == "":                         # If there are no data to play
                toPop.append(i - len(toPop))
            else:
                d = np.fromstring(s, np.int16)
                # Sadly enough each numpy must have same length, so if I get to the end of track, which has only length of 1500 I must throw that away, because numpy doesnt allow me to merge it with array of length 2048
                if len(d) > 2047:               # And again I merge the sounds with numpy and I reduce the volume
                    data += (d/ length * length)  * self.eVolume * 0.3

        for i in toPop:     # If I am at the end of track, I delete it
            del self.effectStreams[i]

        if np.any(data):        # If there are any data to read
            self.nextSample = data.astype(np.int16).tostring()  #I prepare the next CHUNK (should be 20 ms, but I am not sure)
        else:
            self.nextSample = ""


        #print ("timeRest: {0:.1f}".format(  (time.clock() - _time)* 1000), end = "    ||  ")
        print("HOW MANY CHUNKS OF VOICE I GOT: ", l1)
        # It is weird, that when i print the times of reading and writing to stream, it usually prints something like this: (20ms, 20ms, 30ms, 20ms, 20ms, 30ms, 20ms ...)


    def close(self):
        self.timer.stop()
        self.stream.stop_stream()
        self.stream.close()

        self.p.terminate()

The UDP server and client are pretty simple (and they work well, so I dont post them here). Client just sends all data to server and server sends all data to all clients whenever it gets any. I dont tell anybody, who sends the data. That means if the data are delivered too late, I will play both CHUNKS from one client at the same time (because I consider them that they are from multiple clients)!

Here are the wav files: Click Here
I did not created them, I downloaded from site http://www.freesound.org/people/ERH/sounds/31135/ and they are licensed under Attribution!

!! I have also added "OUTPUT.txt" into the dropbox file, which shows, what does python prints out when running this example between two people (i get voice data only from one user).

Thank you for any advice.