// Copyright © 2013 sss // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. //c++ #include #ifndef WIN32 #include #else //getopt #include "wingetopt.h" #endif //c #include #include #include //c++ #include #include #include #include //portaudio #include //vorbis #include //opus #include //boost #include #include #include #include //internal #include "iconv.h" unsigned int precapture = 3, postcapture = 7, min_length = 1, thresold = 0, stop_thresold = 0; float thresold_percent = 5.0, stop_thresold_percent = 5.0; unsigned rate = 0; bool sound_detected = false, debug = false; FILE *file = NULL; boost::mutex lock; std::vector buffer; struct detector_params { std::string out_dir; double sample_rate; }; detector_params sd_params; struct vorbis_params { ogg_stream_state os; ogg_page og; ogg_packet op; vorbis_info vi; vorbis_comment vc; vorbis_dsp_state vd; vorbis_block vb; }; void vorbis_encode_start(vorbis_params &vparams) { vorbis_info_init(&(vparams.vi)); if(vorbis_encode_init_vbr(&(vparams.vi), 1, rate,.0)) printf("failed to init vorbis vbr"); if(vorbis_analysis_init(&(vparams.vd), &(vparams.vi))) printf("failed to init vorbis analysis"); vorbis_comment_init(&(vparams.vc)); vorbis_comment_add_tag(&(vparams.vc), "ENCODER","sound_detector"); vorbis_block_init(&(vparams.vd), &(vparams.vb)); srand(time(NULL)); ogg_stream_init(&(vparams.os),rand()); ogg_packet header, header_comm, header_code; vorbis_analysis_headerout(&(vparams.vd), &(vparams.vc), &header,&header_comm,&header_code); ogg_stream_packetin(&(vparams.os),&header); ogg_stream_packetin(&(vparams.os),&header_comm); ogg_stream_packetin(&(vparams.os),&header_code); int eos = 0; while(!eos) { int result = ogg_stream_flush(&(vparams.os),&(vparams.og)); if(result == 0) break; fwrite(vparams.og.header,1,vparams.og.header_len,file); fwrite(vparams.og.body,1,vparams.og.body_len,file); } } void vorbis_encode_data(vorbis_params &vparams) { float **b = vorbis_analysis_buffer(&(vparams.vd), buffer.size()); for(size_t i = 0; i < buffer.size(); i++) b[0][i] = (float)buffer[i]*3.0517578125e-5f; vorbis_analysis_wrote(&(vparams.vd), buffer.size()); buffer.clear(); int eos = 0; while(vorbis_analysis_blockout(&(vparams.vd), &(vparams.vb)) == 1) { vorbis_analysis(&(vparams.vb), NULL); vorbis_bitrate_addblock(&(vparams.vb)); while(vorbis_bitrate_flushpacket(&(vparams.vd), &(vparams.op))) { ogg_stream_packetin(&(vparams.os),&(vparams.op)); while(!eos) { int result = ogg_stream_pageout(&(vparams.os), &(vparams.og)); if(result == 0) break; fwrite(vparams.og.header,1, vparams.og.header_len, file); fwrite(vparams.og.body,1, vparams.og.body_len, file); if(ogg_page_eos(&(vparams.og))) eos = 1; } } } } void vorbis_encode_end(vorbis_params &vparams) { vorbis_analysis_wrote(&(vparams.vd),0); ogg_stream_clear(&(vparams.os)); vorbis_block_clear(&(vparams.vb)); vorbis_dsp_clear(&(vparams.vd)); vorbis_comment_clear(&(vparams.vc)); vorbis_info_clear(&(vparams.vi)); } struct opus_params { OpusEncoder *encoder; }; void opus_encode_start(opus_params &op) { if(op.encoder) free(op.encoder); int encoder_error = 0; op.encoder = opus_encoder_create(sd_params.sample_rate, 1, OPUS_APPLICATION_AUDIO, &encoder_error); opus_encoder_ctl(op.encoder, OPUS_SET_BITRATE(24000)); opus_encoder_ctl(op.encoder, OPUS_SET_COMPLEXITY(10)); opus_encoder_ctl(op.encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)); } std::string time_str() { boost::posix_time::ptime now = boost::posix_time::second_clock::local_time(); #ifndef WIN32 return boost::posix_time::to_simple_string(now); #else return boost::posix_time::to_iso_string(now); #endif } void handle_data() { vorbis_params vparams; while(true) { boost::this_thread::sleep(boost::posix_time::milliseconds(100)); if(!sound_detected) { if(buffer.size() > rate * precapture) { /* if(debug) printf("buffer contain %lu, required %d\n", buffer.size(), rate*precapture); */ unsigned long level = 0; lock.lock(); for(size_t i = 0; i < buffer.size(); i++) level += abs(buffer[i]); level /= buffer.size(); if(debug) printf("level detected %f, level required %f\n", ((float)level/(float)INT16_MAX)*100, thresold_percent); if(level > thresold) { if(debug) printf("write started\n"); boost::filesystem::path p(sd_params.out_dir); p += "/"; p += time_str(); p += ".ogg"; file = fopen(p.string().c_str(), "wb"); vorbis_encode_start(vparams); vorbis_encode_data(vparams); sound_detected = true; } else buffer.clear(); lock.unlock(); } } else { if(buffer.size() > rate * postcapture) { /* if(debug) printf("buffer contain %lu, required %d\n", buffer.size(), rate*postcapture); */ unsigned long level = 0; lock.lock(); for(size_t i = 0; i < buffer.size(); i++) level += abs(buffer[i]); level /= buffer.size(); vorbis_encode_data(vparams); if(debug) printf("level detected %f, level lower than %f required to stop\n", ((float)level/(float)INT16_MAX)*100, stop_thresold_percent); lock.unlock(); if(level < stop_thresold) { if(debug) printf("write stopped\n"); fclose(file); vorbis_encode_end(vparams); sound_detected = false; } } } } } int stream_callback(const void *input, void *output, unsigned long frameCount, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData) { int16_t **bufs = (int16_t**)input; int16_t *buf = bufs[0]; lock.lock(); buffer.insert(buffer.end(), buf, buf + frameCount); //debug /* for(size_t i = 0; i < buffer.size(); i++) { if(buffer[i] > (int)((float)INT16_MAX/100.0*(float)thresold_percent)) printf("%d ", buffer[i]); } */ lock.unlock(); return paContinue; //debug end } double get_lowest_rate(const PaStreamParameters *inputParameters) { static double standardSampleRates[] = { 8000.0, 9600.0, 11025.0, 12000.0, 16000.0, 22050.0, 24000.0, 32000.0, 44100.0, 48000.0, 88200.0, 96000.0, 192000.0, -1 /* negative terminated list */ }; for(int i=0; standardSampleRates[i] > 0; i++) { if(Pa_IsFormatSupported(inputParameters, NULL, standardSampleRates[i]) == paFormatIsSupported) { rate = standardSampleRates[i]; return rate; } } return 0; } int main(int argc, char **argv) { std::string str = time_str(); PaError err = Pa_Initialize(); if(err != paNoError) { printf("PortAudio error: %s\n", Pa_GetErrorText(err)); return 1; } int opt = -1, device = -1; if(argc == 1) { #ifndef WIN32 printf("usage:\n%s -h -l -f -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-f\tfork to bakground\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S\tminimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #else printf("usage:\n%s -h -l -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S\tminimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #endif return 1; } bool _fork = false; while((opt = getopt(argc, argv, "hlfvp:P:m:s:S:d:o:")) != -1) { switch(opt) { break; case 'l': { for(PaDeviceIndex i =0, end = Pa_GetDeviceCount(); i < end; i++) { const PaDeviceInfo *info = Pa_GetDeviceInfo(i); //windows #ifdef WIN32 char *name = change_charset(info->name); printf("%d. %s, input channels %d, output channels %d, default sample rate %f, lowest input latency %f, highest input latency %f\n", i, name, info->maxInputChannels, info->maxOutputChannels, info->defaultSampleRate, info->defaultLowInputLatency, info->defaultHighInputLatency); free(name); #else printf("%d. %s, input channels %d, output channels %d, default sample rate %f, lowest input latency %f, highest input latency %f\n", i, info->name, info->maxInputChannels, info->maxOutputChannels, info->defaultSampleRate, info->defaultLowInputLatency, info->defaultHighInputLatency); #endif } exit(0); } break; case 'v': debug = true; break; case 'P': precapture = atoi(optarg); break; case 'p': postcapture = atoi(optarg); break; case 'm': min_length = atoi(optarg); break; case 's': thresold_percent = strtof(optarg, NULL); break; case 'S': stop_thresold_percent = strtof(optarg, NULL); break; case 'o': sd_params.out_dir = optarg; break; case 'd': device = atoi(optarg); break; #ifndef WIN32 case 'f': _fork = true; break; #endif case 'h': default: #ifndef WIN32 printf("usage:\n%s -h -l -f -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-f\tfork to bakground\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S\tminimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #else printf("usage:\n%s -h -l -v -d -o -p -P -m -s -S \n\t-h\tthis help message\n\t-l\tdevice list\n\t-d\tdevice number from device list\n\t-o\toutput directory\n\t-v\tverbose\n\t-P\tpre capture seconds\n\t-p\tpost capture seconds\n\t-m\tminimum capture length\n\t-s\tsound level in float value 5,0%% default\n\t-S\tminimum sound level to stop recording in float value 5,0%% is default\n", argv[0]); #endif break; }; } if(device == -1) { printf("ERROR: device number is required\n"); return 1; } if(sd_params.out_dir.empty()) { printf("ERROR: output directory is required\n"); return 1; } if(!boost::filesystem::exists(sd_params.out_dir)) { printf("ERROR: output directory does not exists\n"); return 1; } if(!boost::filesystem::is_directory(sd_params.out_dir)) { printf("ERROR: output directory is not directory %%) \n"); return 1; } thresold = (int)(((float)INT16_MAX/100.0)*thresold_percent); stop_thresold = (int)(((float)INT16_MAX/100.0)*stop_thresold_percent); PaStream *stream; PaStreamParameters pa_params; memset(&pa_params, 0, sizeof(PaStreamParameters)); pa_params.channelCount = 1; pa_params.sampleFormat = paInt16 | paNonInterleaved; pa_params.device = device; const PaDeviceInfo *info = Pa_GetDeviceInfo(device); pa_params.suggestedLatency = info->defaultHighInputLatency; sd_params.sample_rate = get_lowest_rate(&pa_params); if(debug) printf("choosen device latency %f, choosen rate %f\n", info->defaultHighInputLatency, sd_params.sample_rate); err = Pa_OpenStream(&stream, &pa_params, NULL, sd_params.sample_rate, paFramesPerBufferUnspecified, paClipOff, stream_callback, NULL); if(err != paNoError) printf("PortAudio error: %s\n", Pa_GetErrorText(err)); #ifndef WIN32 if(_fork) { pid_t pid = fork(); if(pid < 0) { std::cerr<<"Failed to fork\n"; exit(EXIT_FAILURE); } if(pid > 0) exit(EXIT_SUCCESS); } #endif //debug //file = fopen("./data_dump", "wb"); //end debug new boost::thread(boost::bind(handle_data)); Pa_StartStream(stream); while(true) boost::this_thread::sleep(boost::posix_time::seconds(1)); err = Pa_Terminate(); if(err != paNoError) { printf("PortAudio error: %s\n", Pa_GetErrorText(err)); return 1; } return 0; }