SecondLife: llaudio/vorbisencode.cpp Source File

00001 
00032 #include "linden_common.h"
00033 
00034 #include "vorbisencode.h"
00035 #include "vorbis/vorbisenc.h"
00036 #include "llerror.h"
00037 #include "llrand.h"
00038 #include "llmath.h"
00039 #include "llapr.h"
00040 
00041 //#if LL_DARWIN
00042 // MBW -- XXX -- Getting rid of SecondLifeVorbis for now -- no fmod means no name collisions.
00043 #if 0
00044 #include "VorbisFramework.h"
00045 
00046 #define vorbis_analysis                         mac_vorbis_analysis
00047 #define vorbis_analysis_headerout       mac_vorbis_analysis_headerout
00048 #define vorbis_analysis_init            mac_vorbis_analysis_init
00049 #define vorbis_encode_ctl                       mac_vorbis_encode_ctl
00050 #define vorbis_encode_setup_init        mac_vorbis_encode_setup_init
00051 #define vorbis_encode_setup_managed     mac_vorbis_encode_setup_managed
00052 
00053 #define vorbis_info_init                        mac_vorbis_info_init
00054 #define vorbis_info_clear                       mac_vorbis_info_clear
00055 #define vorbis_comment_init                     mac_vorbis_comment_init
00056 #define vorbis_comment_clear            mac_vorbis_comment_clear
00057 #define vorbis_block_init                       mac_vorbis_block_init
00058 #define vorbis_block_clear                      mac_vorbis_block_clear
00059 #define vorbis_dsp_clear                        mac_vorbis_dsp_clear
00060 #define vorbis_analysis_buffer          mac_vorbis_analysis_buffer
00061 #define vorbis_analysis_wrote           mac_vorbis_analysis_wrote
00062 #define vorbis_analysis_blockout        mac_vorbis_analysis_blockout
00063 
00064 #define ogg_stream_packetin                     mac_ogg_stream_packetin
00065 #define ogg_stream_init                         mac_ogg_stream_init
00066 #define ogg_stream_flush                        mac_ogg_stream_flush
00067 #define ogg_stream_pageout                      mac_ogg_stream_pageout
00068 #define ogg_page_eos                            mac_ogg_page_eos
00069 #define ogg_stream_clear                        mac_ogg_stream_clear
00070 
00071 #endif
00072 
00073 S32 check_for_invalid_wav_formats(const char *in_fname, char *error_msg)
00074 {
00075         U16 num_channels = 0;
00076         U32 sample_rate = 0;
00077         U32 bits_per_sample = 0;
00078         U32 physical_file_size = 0;
00079         U32 chunk_length = 0;
00080         U32 raw_data_length = 0;
00081         U32 bytes_per_sec = 0;
00082         BOOL uncompressed_pcm = FALSE;
00083 
00084         unsigned char wav_header[44];           /*Flawfinder: ignore*/
00085 
00086         error_msg[0] = '\0';
00087 
00088     apr_file_t* infp = ll_apr_file_open(in_fname,LL_APR_RB);
00089         if (!infp)
00090         {
00091                 strcpy(error_msg, "CannotUploadSoundFile");     /*Flawfinder: ignore*/
00092                 return(LLVORBISENC_SOURCE_OPEN_ERR);
00093         }
00094 
00095         ll_apr_file_read(infp, wav_header, 44);
00096         physical_file_size = ll_apr_file_seek(infp,APR_END,0);
00097 
00098         if (strncmp((char *)&(wav_header[0]),"RIFF",4))
00099         {
00100                 strcpy(error_msg, "SoundFileNotRIFF");  /*Flawfinder: ignore*/
00101                 apr_file_close(infp);
00102             return(LLVORBISENC_WAV_FORMAT_ERR);
00103         }
00104 
00105         if (strncmp((char *)&(wav_header[8]),"WAVE",4))
00106         {
00107                 strcpy(error_msg, "SoundFileNotRIFF");  /*Flawfinder: ignore*/
00108                 apr_file_close(infp);
00109             return(LLVORBISENC_WAV_FORMAT_ERR);
00110         }
00111         
00112         // parse the chunks
00113         
00114         U32 file_pos = 12;  // start at the first chunk (usually fmt but not always)
00115         
00116         while ((file_pos + 8)< physical_file_size)
00117         {
00118                 ll_apr_file_seek(infp,APR_SET,file_pos);
00119                 ll_apr_file_read(infp, wav_header, 44);
00120 
00121                 chunk_length = ((U32) wav_header[7] << 24) 
00122                         + ((U32) wav_header[6] << 16) 
00123                         + ((U32) wav_header[5] << 8) 
00124                         + wav_header[4];
00125 
00126 //              llinfos << "chunk found: '" << wav_header[0] << wav_header[1] << wav_header[2] << wav_header[3] << "'" << llendl;
00127 
00128                 if (!(strncmp((char *)&(wav_header[0]),"fmt ",4)))
00129                 {
00130                         if ((wav_header[8] == 0x01) && (wav_header[9] == 0x00))
00131                         {
00132                                 uncompressed_pcm = TRUE;
00133                         }
00134                         num_channels = ((U16) wav_header[11] << 8) + wav_header[10];
00135                         sample_rate = ((U32) wav_header[15] << 24) 
00136                                 + ((U32) wav_header[14] << 16) 
00137                                 + ((U32) wav_header[13] << 8) 
00138                                 + wav_header[12];
00139                         bits_per_sample = ((U16) wav_header[23] << 8) + wav_header[22];
00140                         bytes_per_sec = ((U32) wav_header[19] << 24) 
00141                                 + ((U32) wav_header[18] << 16) 
00142                                 + ((U32) wav_header[17] << 8) 
00143                                 + wav_header[16];
00144                 }
00145                 else if (!(strncmp((char *)&(wav_header[0]),"data",4)))
00146                 {
00147                         raw_data_length = chunk_length;                 
00148                 }
00149                 file_pos += (chunk_length + 8);
00150                 chunk_length = 0;
00151         } 
00152 
00153         apr_file_close(infp);   
00154 
00155         if (!uncompressed_pcm)
00156         {       
00157                  strcpy(error_msg, "SoundFileNotPCM");          /*Flawfinder: ignore*/
00158                   return(LLVORBISENC_PCM_FORMAT_ERR);
00159         }
00160         
00161         if ((num_channels < 1) || (num_channels > 2))
00162         {       
00163                 strcpy(error_msg, "SoundFileInvalidChannelCount");      /*Flawfinder: ignore*/
00164                 return(LLVORBISENC_MULTICHANNEL_ERR);
00165         }
00166 
00167         if (sample_rate != 44100)
00168         {       
00169                 strcpy(error_msg, "SoundFileInvalidSampleRate");                /*Flawfinder: ignore*/
00170                 return(LLVORBISENC_UNSUPPORTED_SAMPLE_RATE);
00171         }
00172         
00173         if ((bits_per_sample != 16) && (bits_per_sample != 8))
00174         {                
00175                 strcpy(error_msg, "SoundFileInvalidWordSize");          /*Flawfinder: ignore*/
00176                 return(LLVORBISENC_UNSUPPORTED_WORD_SIZE);
00177         }
00178 
00179         if (!raw_data_length)
00180         {
00181                 strcpy(error_msg, "SoundFileInvalidHeader");                    /*Flawfinder: ignore*/
00182                 return(LLVORBISENC_CLIP_TOO_LONG);               
00183         }
00184 
00185         F32 clip_length = (F32)raw_data_length/(F32)bytes_per_sec;
00186                 
00187         if (clip_length > 10.0f)
00188         {
00189                 strcpy(error_msg, "SoundFileInvalidTooLong");                   /*Flawfinder: ignore*/
00190                 return(LLVORBISENC_CLIP_TOO_LONG);               
00191         }
00192 
00193     return(LLVORBISENC_NOERR);
00194 }
00195 
00196 S32 encode_vorbis_file(const char *in_fname, const char *out_fname)
00197 {
00198 #define READ_BUFFER 1024
00199         unsigned char readbuffer[READ_BUFFER*4+44];   /* out of the data segment, not the stack */      /*Flawfinder: ignore*/
00200 
00201         ogg_stream_state os; /* take physical pages, weld into a logical stream of packets */
00202         ogg_page         og; /* one Ogg bitstream page.  Vorbis packets are inside */
00203         ogg_packet       op; /* one raw packet of data for decode */
00204         
00205         vorbis_info      vi; /* struct that stores all the static vorbis bitstream settings */
00206         vorbis_comment   vc; /* struct that stores all the user comments */
00207         
00208         vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */
00209         vorbis_block     vb; /* local working space for packet->PCM decode */
00210         
00211         int eos=0;
00212         int result;
00213 
00214         U16 num_channels = 0;
00215         U32 sample_rate = 0;
00216         U32 bits_per_sample = 0;
00217 
00218         S32 format_error = 0;
00219         char error_msg[MAX_STRING];     /*Flawfinder: ignore*/
00220         if ((format_error = check_for_invalid_wav_formats(in_fname, error_msg)))
00221         {
00222                 llwarns << error_msg << ": " << in_fname << llendl;
00223                 return(format_error);
00224         }
00225 
00226 #if 1
00227         unsigned char wav_header[44];   /*Flawfinder: ignore*/
00228 
00229         S32 data_left = 0;
00230 
00231         apr_file_t* infp = ll_apr_file_open(in_fname,LL_APR_RB);
00232         if (!infp)
00233         {
00234                 llwarns << "Couldn't open temporary ogg file for writing: " << in_fname
00235                         << llendl;
00236                 return(LLVORBISENC_SOURCE_OPEN_ERR);
00237         }
00238         apr_file_t* outfp = ll_apr_file_open(out_fname,LL_APR_WPB);
00239         if (!outfp)
00240         {
00241                 llwarns << "Couldn't open upload sound file for reading: " << in_fname
00242                         << llendl;
00243                 apr_file_close (infp);
00244                 return(LLVORBISENC_DEST_OPEN_ERR);
00245         }
00246         
00247          // parse the chunks
00248          U32 chunk_length = 0;
00249          U32 file_pos = 12;  // start at the first chunk (usually fmt but not always)
00250          
00251          while (apr_file_eof(infp) != APR_EOF)
00252          {
00253                  ll_apr_file_seek(infp,APR_SET,file_pos);
00254                  ll_apr_file_read(infp, wav_header, 44);
00255                  
00256                  chunk_length = ((U32) wav_header[7] << 24) 
00257                          + ((U32) wav_header[6] << 16) 
00258                          + ((U32) wav_header[5] << 8) 
00259                          + wav_header[4];
00260                  
00261 //               llinfos << "chunk found: '" << wav_header[0] << wav_header[1] << wav_header[2] << wav_header[3] << "'" << llendl;
00262                  
00263                  if (!(strncmp((char *)&(wav_header[0]),"fmt ",4)))
00264                  {
00265                          num_channels = ((U16) wav_header[11] << 8) + wav_header[10];
00266                          sample_rate = ((U32) wav_header[15] << 24) 
00267                                  + ((U32) wav_header[14] << 16) 
00268                                  + ((U32) wav_header[13] << 8) 
00269                                  + wav_header[12];
00270                          bits_per_sample = ((U16) wav_header[23] << 8) + wav_header[22];
00271                  }
00272                  else if (!(strncmp((char *)&(wav_header[0]),"data",4)))
00273                  {
00274                          ll_apr_file_seek(infp,APR_SET,file_pos+8);
00275                          // leave the file pointer at the beginning of the data chunk data
00276                          data_left = chunk_length;                      
00277                          break;
00278                  }
00279                  file_pos += (chunk_length + 8);
00280                  chunk_length = 0;
00281          } 
00282          
00283 //       apr_file_close(infp);  
00284 
00285          /********** Encode setup ************/
00286          
00287          /* choose an encoding mode */
00288          /* (mode 0: 44kHz stereo uncoupled, roughly 128kbps VBR) */
00289          vorbis_info_init(&vi);
00290 
00291          // always encode to mono
00292 
00293          // SL-52913 & SL-53779 determined this quality level to be our 'good
00294          // enough' general-purpose quality level with a nice low bitrate.
00295          // Equivalent to oggenc -q0.5
00296          F32 quality = 0.05f;
00297 //       quality = (bitrate==128000 ? 0.4f : 0.1);
00298 
00299 //       if (vorbis_encode_init(&vi, /* num_channels */ 1 ,sample_rate, -1, bitrate, -1))
00300          if (vorbis_encode_init_vbr(&vi, /* num_channels */ 1 ,sample_rate, quality))
00301 //       if (vorbis_encode_setup_managed(&vi,1,sample_rate,-1,bitrate,-1) ||
00302 //              vorbis_encode_ctl(&vi,OV_ECTL_RATEMANAGE_AVG,NULL) ||
00303 //              vorbis_encode_setup_init(&vi))
00304         {
00305                 llwarns << "unable to initialize vorbis codec at quality " << quality << llendl;
00306                 //              llwarns << "unable to initialize vorbis codec at bitrate " << bitrate << llendl;
00307                 return(LLVORBISENC_DEST_OPEN_ERR);
00308         }
00309          
00310          /* add a comment */
00311          vorbis_comment_init(&vc);
00312 //       vorbis_comment_add(&vc,"Linden");
00313          
00314          /* set up the analysis state and auxiliary encoding storage */
00315          vorbis_analysis_init(&vd,&vi);
00316          vorbis_block_init(&vd,&vb);
00317          
00318          /* set up our packet->stream encoder */
00319          /* pick a random serial number; that way we can more likely build
00320                 chained streams just by concatenation */
00321          ogg_stream_init(&os, ll_rand());
00322          
00323          /* Vorbis streams begin with three headers; the initial header (with
00324                 most of the codec setup parameters) which is mandated by the Ogg
00325                 bitstream spec.  The second header holds any comment fields.  The
00326                 third header holds the bitstream codebook.  We merely need to
00327                 make the headers, then pass them to libvorbis one at a time;
00328                 libvorbis handles the additional Ogg bitstream constraints */
00329          
00330          {
00331                  ogg_packet header;
00332                  ogg_packet header_comm;
00333                  ogg_packet header_code;
00334                  
00335                  vorbis_analysis_headerout(&vd,&vc,&header,&header_comm,&header_code);
00336                  ogg_stream_packetin(&os,&header); /* automatically placed in its own
00337                                                                                           page */
00338                  ogg_stream_packetin(&os,&header_comm);
00339                  ogg_stream_packetin(&os,&header_code);
00340                  
00341                  /* We don't have to write out here, but doing so makes streaming 
00342                   * much easier, so we do, flushing ALL pages. This ensures the actual
00343                   * audio data will start on a new page
00344                   */
00345                  while(!eos){
00346                          int result=ogg_stream_flush(&os,&og);
00347                          if(result==0)break;
00348                          ll_apr_file_write(outfp, og.header, og.header_len);
00349                          ll_apr_file_write(outfp, og.body, og.body_len);
00350                  }
00351                  
00352          }
00353          
00354          
00355          while(!eos)
00356          {
00357                  long bytes_per_sample = bits_per_sample/8;
00358 
00359                  long bytes=(long)ll_apr_file_read(infp, readbuffer,llclamp((S32)(READ_BUFFER*num_channels*bytes_per_sample),0,data_left)); /* stereo hardwired here */
00360                  
00361                  if (bytes==0)
00362                  {
00363                          /* end of file.  this can be done implicitly in the mainline,
00364                                 but it's easier to see here in non-clever fashion.
00365                                 Tell the library we're at end of stream so that it can handle
00366                                 the last frame and mark end of stream in the output properly */
00367 
00368                          vorbis_analysis_wrote(&vd,0);
00369 //                       eos = 1;
00370                          
00371                  }
00372                  else
00373                  {
00374                          long i;
00375                          long samples;
00376                          int temp;
00377 
00378                          data_left -= bytes;
00379              /* data to encode */
00380                          
00381                          /* expose the buffer to submit data */
00382                          float **buffer=vorbis_analysis_buffer(&vd,READ_BUFFER);
00383                         
00384                          i = 0;
00385                          samples = bytes / (num_channels * bytes_per_sample);
00386 
00387                          if (num_channels == 2)
00388                          {
00389                                  if (bytes_per_sample == 2)
00390                                  {
00391                                          /* uninterleave samples */
00392                                          for(i=0; i<samples ;i++)
00393                                          {
00394                                                  temp =  ((signed char *)readbuffer)[i*4+1];    /*Flawfinder: ignore*/
00395                                                  temp += ((signed char *)readbuffer)[i*4+3];    /*Flawfinder: ignore*/
00396                                                  temp <<= 8;
00397                                                  temp += readbuffer[i*4];
00398                                                  temp += readbuffer[i*4+2];
00399 
00400                                                  buffer[0][i] = ((float)temp) / 65536.f;
00401                                          }
00402                                  }
00403                                  else // presume it's 1 byte per which is unsigned (F#@%ing wav "standard")
00404                                  {
00405                                          /* uninterleave samples */
00406                                          for(i=0; i<samples ;i++)
00407                                          {
00408                                                  temp  = readbuffer[i*2+0];
00409                                                  temp += readbuffer[i*2+1];
00410                                                  temp -= 256;
00411                                                  buffer[0][i] = ((float)temp) / 256.f;
00412                                          }
00413                                  } 
00414                          }
00415                          else if (num_channels == 1)
00416                          {
00417                                  if (bytes_per_sample == 2)
00418                                  {
00419                                          for(i=0; i < samples ;i++)
00420                                          {
00421                                                  temp = ((signed char*)readbuffer)[i*2+1];
00422                                                  temp <<= 8;
00423                                                  temp += readbuffer[i*2];
00424                                                  buffer[0][i] = ((float)temp) / 32768.f;
00425                                          }
00426                                  }
00427                                  else // presume it's 1 byte per which is unsigned (F#@%ing wav "standard")
00428                                  {
00429                                          for(i=0; i < samples ;i++)
00430                                          {
00431                                                  temp = readbuffer[i];
00432                                                  temp -= 128;
00433                                                  buffer[0][i] = ((float)temp) / 128.f;
00434                                          }
00435                                  }
00436                          }
00437                                 
00438                          /* tell the library how much we actually submitted */
00439                          vorbis_analysis_wrote(&vd,i);
00440                  }
00441                          
00442                  /* vorbis does some data preanalysis, then divvies up blocks for
00443                         more involved (potentially parallel) processing.  Get a single
00444                         block for encoding now */
00445                  while(vorbis_analysis_blockout(&vd,&vb)==1)
00446                  {
00447                          
00448                          /* analysis */
00449                         /* Do the main analysis, creating a packet */
00450                         vorbis_analysis(&vb, NULL);
00451                         vorbis_bitrate_addblock(&vb);
00452 
00453                         while(vorbis_bitrate_flushpacket(&vd, &op)) 
00454                         {
00455                          
00456                          /* weld the packet into the bitstream */
00457                          ogg_stream_packetin(&os,&op);
00458                          
00459                          /* write out pages (if any) */
00460                          while(!eos)
00461                          {
00462                                  result = ogg_stream_pageout(&os,&og);
00463 
00464                                  if(result==0)
00465                                         break;
00466 
00467                                  ll_apr_file_write(outfp, og.header, og.header_len);
00468                                  ll_apr_file_write(outfp, og.body, og.body_len);
00469                                  
00470                                  /* this could be set above, but for illustrative purposes, I do
00471                                         it here (to show that vorbis does know where the stream ends) */
00472                                  
00473                                  if(ogg_page_eos(&og))
00474                                         eos=1;
00475                                  
00476                          }
00477                         }
00478                  }
00479          }
00480          
00481          
00482          
00483          /* clean up and exit.  vorbis_info_clear() must be called last */
00484          
00485          ogg_stream_clear(&os);
00486          vorbis_block_clear(&vb);
00487          vorbis_dsp_clear(&vd);
00488          vorbis_comment_clear(&vc);
00489          vorbis_info_clear(&vi);
00490          
00491          /* ogg_page and ogg_packet structs always point to storage in
00492                 libvorbis.  They're never freed or manipulated directly */
00493          
00494 //       fprintf(stderr,"Vorbis encoding: Done.\n");
00495          llinfos << "Vorbis encoding: Done." << llendl;
00496          apr_file_close(outfp);
00497          apr_file_close(infp);
00498          
00499 #endif
00500          return(LLVORBISENC_NOERR);
00501          
00502 }