// Copyright © 2013 sss // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #ifndef WIN32 #include #else #include "wingetopt.h" #endif #include //c #include #include #include #include //c++ #include #include //portaudio #include #include //boost #include #include #include #include unsigned int precapture = 3, postcapture = 7, min_length = 1, thresold = 0, stop_thresold = 0; float thresold_percent = 5.0, stop_thresold_percent = 5.0; unsigned rate = 0; bool sound_detected = false, debug = false; FILE *file = NULL; boost::mutex lock; std::vector buffer; struct vorbis_params { ogg_stream_state os; ogg_page og; ogg_packet op; vorbis_info vi; vorbis_comment vc; vorbis_dsp_state vd; vorbis_block vb; }; void encode_start(vorbis_params &vparams) { vorbis_info_init(&(vparams.vi)); if(vorbis_encode_init_vbr(&(vparams.vi), 1, rate,.0)) printf("failed to init vorbis vbr"); if(vorbis_analysis_init(&(vparams.vd), &(vparams.vi))) printf("failed to init vorbis analysis"); vorbis_comment_init(&(vparams.vc)); vorbis_comment_add_tag(&(vparams.vc), "ENCODER","sound_detector"); vorbis_block_init(&(vparams.vd), &(vparams.vb)); srand(time(NULL)); ogg_stream_init(&(vparams.os),rand()); ogg_packet header, header_comm, header_code; vorbis_analysis_headerout(&(vparams.vd), &(vparams.vc), &header,&header_comm,&header_code); ogg_stream_packetin(&(vparams.os),&header); ogg_stream_packetin(&(vparams.os),&header_comm); ogg_stream_packetin(&(vparams.os),&header_code); int eos = 0; while(!eos) { int result = ogg_stream_flush(&(vparams.os),&(vparams.og)); if(result == 0) break; fwrite(vparams.og.header,1,vparams.og.header_len,file); fwrite(vparams.og.body,1,vparams.og.body_len,file); } } void encode_data(vorbis_params &vparams) { float **b = vorbis_analysis_buffer(&(vparams.vd), buffer.size()); for(size_t i = 0; i < buffer.size(); i++) b[0][i] = (float)buffer[i]*3.0517578125e-5f; vorbis_analysis_wrote(&(vparams.vd), buffer.size()); buffer.clear(); int eos = 0; while(vorbis_analysis_blockout(&(vparams.vd), &(vparams.vb)) == 1) { vorbis_analysis(&(vparams.vb), NULL); vorbis_bitrate_addblock(&(vparams.vb)); while(vorbis_bitrate_flushpacket(&(vparams.vd), &(vparams.op))) { ogg_stream_packetin(&(vparams.os),&(vparams.op)); while(!eos) { int result = ogg_stream_pageout(&(vparams.os), &(vparams.og)); if(result == 0) break; fwrite(vparams.og.header,1, vparams.og.header_len, file); fwrite(vparams.og.body,1, vparams.og.body_len, file); if(ogg_page_eos(&(vparams.og))) eos = 1; } } } } void encode_end(vorbis_params &vparams) { vorbis_analysis_wrote(&(vparams.vd),0); ogg_stream_clear(&(vparams.os)); vorbis_block_clear(&(vparams.vb)); vorbis_dsp_clear(&(vparams.vd)); vorbis_comment_clear(&(vparams.vc)); vorbis_info_clear(&(vparams.vi)); } std::string time_str() { boost::posix_time::ptime now = boost::posix_time::second_clock::local_time(); #ifndef WIN32 return boost::posix_time::to_simple_string(now); #else return boost::posix_time::to_iso_string(now); #endif } char out_dir[1024] = {0}; /* * It sounds like you want to do something (maybe start recording) if the sound level goes above a certain threshold. This is sometimes called a "gate". It also sounds like you are having trouble with false positives. This is sometimes handled with a "side-chain" applied to the gate. The general principle of a gate is create an envelope of your signal, and then monitor the envelope to discover when it goes above a certain threshold. If it is above the threshold, your gate is "on", if not, your gate is "off". If you treat your signal before creating the envelope in some way to make it more or less sensitive to various parts of your signal/noise the treatment is called a "side-chain". You will have to discover the details on your own because there is too much for a Q&A website, but maybe this is enough of a start: float[] buffer; //defined elsewhere float HOLD = .9999 ; //there are precise ways to compute this, but experimentation might work fine float THRESH = .7 ; //or whatever float env = 0; //we initialize to 0, but in real code be sure to save this between runs for(size_t i = 0; i < buffer.size(); i++) { // side-chain, if used, goes here float b = buffer[i]; // create envelope: float tmp = abs(b); // you could also do buffer[i] * buffer[i] env = env * HOLD + tmp * (1-HOLD); // threshold detection if( env > THRESH ) { //gate is "on" } else { //gate is "off" } } The side-chain might consist of filters like an eq. Here is a tutorial on designing audio eq: http://blog.bjornroche.com/2012/08/basic-audio-eqs.html * */ void handle_data() { vorbis_params vparams; while(true) { boost::this_thread::sleep(boost::posix_time::milliseconds(100)); if(!sound_detected) { if(buffer.size() > rate * precapture) { /* if(debug) printf("buffer contain %lu, required %d\n", buffer.size(), rate*precapture); */ unsigned long level = 0; lock.lock(); for(size_t i = 0; i < buffer.size(); i++) level += abs(buffer[i]); level /= buffer.size(); if(debug) printf("level detected %f, level required %f\n", ((float)level/(float)INT16_MAX)*100, thresold_percent); if(level > thresold) { printf("write started\n"); boost::filesystem::path p(out_dir); p += "/"; p += time_str(); p += ".ogg"; file = fopen(p.string().c_str(), "wb"); encode_start(vparams); encode_data(vparams); sound_detected = true; } else buffer.clear(); lock.unlock(); } } else { if(buffer.size() > rate * postcapture) { /* if(debug) printf("buffer contain %lu, required %d\n", buffer.size(), rate*postcapture); */ unsigned long level = 0; lock.lock(); for(size_t i = 0; i < buffer.size(); i++) level += abs(buffer[i]); level /= buffer.size(); encode_data(vparams); if(debug) printf("level detected %f, level lower than %f required to stop\n", ((float)level/(float)INT16_MAX)*100, stop_thresold_percent); lock.unlock(); if(level < stop_thresold) { if(debug) printf("write stopped\n"); fclose(file); encode_end(vparams); sound_detected = false; } } } } } int stream_callback(const void *input, void *output, unsigned long frameCount, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData) { int16_t *buf = (int16_t*)input; lock.lock(); buffer.insert(buffer.end(), buf, buf + frameCount); lock.unlock(); //debug /* for(unsigned long i = 0; i < frameCount; i++) { if(buf[i] > (int)((float)INT16_MAX/100.0*(float)thresold_percent)) printf("%d ", buf[i]); } */ return paContinue; //debug end } double get_lowest_rate(const PaStreamParameters *inputParameters) { static double standardSampleRates[] = { 8000.0, 9600.0, 11025.0, 12000.0, 16000.0, 22050.0, 24000.0, 32000.0, 44100.0, 48000.0, 88200.0, 96000.0, 192000.0, -1 /* negative terminated list */ }; for(int i=0; standardSampleRates[i] > 0; i++) { if(Pa_IsFormatSupported(inputParameters, NULL, standardSampleRates[i]) == paFormatIsSupported) { rate = standardSampleRates[i]; return rate; } } return 0; } int main(int argc, char **argv) { std::string str = time_str(); PaError err = Pa_Initialize(); if(err != paNoError) { printf("PortAudio error: %s\n", Pa_GetErrorText(err)); return 1; } int opt = -1, device = -1; if(argc == 1) { #ifndef WIN32 printf("usage:\n%s -h -l -f -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-f\tfork to bakground\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S minimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #else printf("usage:\n%s -h -l -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S minimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #endif return 1; } bool _fork = false; while((opt = getopt(argc, argv, "hlfvp:P:m:s:S:d:o:")) != -1) { switch(opt) { break; case 'l': { for(PaDeviceIndex i =0, end = Pa_GetDeviceCount(); i < end; i++) { const PaDeviceInfo *info = Pa_GetDeviceInfo(i); printf("%d. %s, input channels %d, output channels %d, default sample rate %f, lowest input latency %f, highest input latency %f\n", i, info->name, info->maxInputChannels, info->maxOutputChannels, info->defaultSampleRate, info->defaultLowInputLatency, info->defaultHighInputLatency); } exit(0); } break; case 'v': debug = true; break; case 'P': precapture = atoi(optarg); break; case 'p': postcapture = atoi(optarg); break; case 'm': min_length = atoi(optarg); break; case 's': thresold_percent = strtof(optarg, NULL); break; case 'S': stop_thresold_percent = strtof(optarg, NULL); break; case 'o': strcpy(out_dir, optarg); break; case 'd': device = atoi(optarg); break; #ifdef WIN32 case 'f': _fork = true; break; #endif case 'h': default: #ifndef WIN32 printf("usage:\n%s -h -l -f -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-f\tfork to bakground\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S minimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #else printf("usage:\n%s -h -l -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S minimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #endif break; }; } if(device == -1) { printf("ERROR: device number is required\n"); return 1; } if(!out_dir[0]) { printf("ERROR: output directory is required\n"); return 1; } if(!boost::filesystem::exists(out_dir)) { printf("ERROR: output directory does not exists\n"); return 1; } if(!boost::filesystem::is_directory(out_dir)) { printf("ERROR: output directory is not directory %%) \n"); return 1; } thresold = (int)(((float)INT16_MAX/100.0)*thresold_percent); stop_thresold = (int)(((float)INT16_MAX/100.0)*stop_thresold_percent); PaStream *stream; PaStreamParameters params; memset(¶ms, 0, sizeof(PaStreamParameters)); params.channelCount = 1; params.sampleFormat = paInt16; params.device = device; const PaDeviceInfo *info = Pa_GetDeviceInfo(device); params.suggestedLatency = info->defaultHighInputLatency; if(debug) printf("choosen device latency %f, choosen rate %f\n", info->defaultHighInputLatency, get_lowest_rate(¶ms)); err = Pa_OpenStream(&stream, ¶ms, NULL, get_lowest_rate(¶ms), paFramesPerBufferUnspecified, paNoFlag, stream_callback, NULL); if(err != paNoError) printf("PortAudio error: %s\n", Pa_GetErrorText(err)); #ifndef WIN32 if(_fork) { pid_t pid = fork(); if(pid < 0) { std::cerr<<"Failed to fork\n"; exit(EXIT_FAILURE); } if(pid > 0) exit(EXIT_SUCCESS); } #endif //debug //file = fopen("./data_dump", "wb"); //end debug new boost::thread(boost::bind(handle_data)); Pa_StartStream(stream); while(true) boost::this_thread::sleep(boost::posix_time::seconds(1)); err = Pa_Terminate(); if(err != paNoError) { printf("PortAudio error: %s\n", Pa_GetErrorText(err)); return 1; } return 0; }