I’m working on an rtsp streaming(AAC format) client for iOS using ffmpeg. Right now

Question

0

Asked: May 26, 20262026-05-26T21:47:04+00:00 2026-05-26T21:47:04+00:00

I’m working on an rtsp streaming(AAC format) client for iOS using ffmpeg. Right now

0

I’m working on an rtsp streaming(AAC format) client for iOS using ffmpeg. Right now I can only say my app is workable, but the streaming sound is very noisy and even a little distorted, far worse than when it’s played by vlc or mplayer.

The stream is read by av_read_frame(), decoded by avcodec_decode_audio3(). Then I just send the decoded raw audio to Audio Queue.

When decoding a local aac file with my app, the sound seemed not so noisy at all. I know initial encoding would dramatically affect the result. However at least I should try to have it sounded like other streaming clients…

Many parts in my implementation/modification actually came from try and error. I believe I’m doing something wrong in setting up Audio Queue, and the callback function for filling Audio Buffer.

Any hints, suggestions or help are greatly appreciated.

// –info of test materials dumped by av_dump_format() —

Metadata:
    title           : /demo/test.3gp
  Duration: 00:00:30.11, start: 0.000000, bitrate: N/A
    Stream #0:0: Audio: aac, 32000 Hz, stereo, s16
aac  Advanced Audio Coding

// — the Audio Queue setup procedure —

- (void) startPlayback
{
    OSStatus err = 0;
    if(playState.playing) return;

    playState.started = false;

    if(!playState.queue) 
    {

        UInt32 bufferSize;


        playState.format.mSampleRate = _av->audio.sample_rate;
        playState.format.mFormatID = kAudioFormatLinearPCM;
        playState.format.mFormatFlags = kAudioFormatFlagsCanonical;
        playState.format.mChannelsPerFrame = _av->audio.channels_per_frame;
        playState.format.mBytesPerPacket = sizeof(AudioSampleType) *_av->audio.channels_per_frame;
        playState.format.mBytesPerFrame = sizeof(AudioSampleType) *_av->audio.channels_per_frame;
        playState.format.mBitsPerChannel = 8 * sizeof(AudioSampleType);

        playState.format.mFramesPerPacket = 1;        
        playState.format.mReserved = 0;


        pauseStart = 0;
        DeriveBufferSize(playState.format,playState.format.mBytesPerPacket,BUFFER_DURATION,&bufferSize,&numPacketsToRead);
        err= AudioQueueNewOutput(&playState.format, aqCallback, &playState, NULL, kCFRunLoopCommonModes, 0, &playState.queue);

        if(err != 0)
        {
            printf("AQHandler.m startPlayback: Error creating new AudioQueue: %d \n", (int)err);
        }

        for(int i = 0 ; i < NUM_BUFFERS ; i ++)
        {
            err = AudioQueueAllocateBufferWithPacketDescriptions(playState.queue, bufferSize, numPacketsToRead , &playState.buffers[i]);

            if(err != 0)
                printf("AQHandler.m startPlayback: Error allocating buffer %d", i);
            fillAudioBuffer(&playState,playState.queue, playState.buffers[i]);
        }

    }

    startTime = mu_currentTimeInMicros();

    err=AudioQueueStart(playState.queue, NULL);

    if(err)
    {

        char sErr[4];
        printf("AQHandler.m startPlayback: Could not start queue %ld %s.", err, FormatError(sErr,err));

        playState.playing = NO;
    } 
    else
    {
        AudioSessionSetActive(true);
        playState.playing = YES;
    }           
}

// — callback for filling audio buffer —

static int ct = 0;
static void fillAudioBuffer(void *info,AudioQueueRef queue, AudioQueueBufferRef buffer)
{

    int lengthCopied = INT32_MAX;
    int dts= 0;
    int isDone = 0;

    buffer->mAudioDataByteSize = 0;
    buffer->mPacketDescriptionCount = 0;

    OSStatus err = 0;
    AudioTimeStamp bufferStartTime;

    AudioQueueGetCurrentTime(queue, NULL, &bufferStartTime, NULL);


    PlayState *ps = (PlayState *)info;

    if (!ps->started)
        ps->started = true;

    while(buffer->mPacketDescriptionCount < numPacketsToRead && lengthCopied > 0)
    {
        lengthCopied = getNextAudio(_av,
                        buffer->mAudioDataBytesCapacity-buffer->mAudioDataByteSize,
                        (uint8_t*)buffer->mAudioData+buffer->mAudioDataByteSize,
                        &dts,&isDone);

        ct+= lengthCopied;

        if(lengthCopied < 0 || isDone) 
        {
            printf("nothing to read....\n\n");
            PlayState *ps = (PlayState *)info;
            ps->finished = true;
            ps->started = false;
            break;
        }

        if(aqStartDts < 0) aqStartDts = dts;

        if(buffer->mPacketDescriptionCount ==0)
        {
            bufferStartTime.mFlags = kAudioTimeStampSampleTimeValid;
            bufferStartTime.mSampleTime = (Float64)(dts-aqStartDts);//* _av->audio.frame_size;

            if (bufferStartTime.mSampleTime <0 ) 
                bufferStartTime.mSampleTime = 0;

            printf("AQHandler.m fillAudioBuffer: DTS for %x: %lf time base: %lf StartDTS: %d\n", 
                    (unsigned int)buffer, 
                    bufferStartTime.mSampleTime, 
                    _av->audio.time_base, 
                    aqStartDts);

        }

        buffer->mPacketDescriptions[buffer->mPacketDescriptionCount].mStartOffset = buffer->mAudioDataByteSize;
        buffer->mPacketDescriptions[buffer->mPacketDescriptionCount].mDataByteSize = lengthCopied;



        buffer->mPacketDescriptions[buffer->mPacketDescriptionCount].mVariableFramesInPacket = 0;

        buffer->mPacketDescriptionCount++;

        buffer->mAudioDataByteSize += lengthCopied;
    }

    int audioBufferCount, audioBufferTotal,  videoBufferCount, videoBufferTotal;
    bufferCheck(_av,&videoBufferCount, &videoBufferTotal, &audioBufferCount, &audioBufferTotal);

    if(buffer->mAudioDataByteSize)
    {

        err = AudioQueueEnqueueBufferWithParameters(queue, buffer, 0, NULL, 0, 0, 0, NULL, &bufferStartTime, NULL);

        if(err)
        {
            char sErr[10];
            printf("AQHandler.m fillAudioBuffer: Could not enqueue buffer 0x%x: %d %s.", buffer, err, FormatError(sErr, err));

        }

    }

}




int getNextAudio(video_data_t* vInst, int maxlength, uint8_t* buf, int* pts, int* isDone) 
{

    struct video_context_t  *ctx = vInst->context;
    int    datalength            = 0;

    while(ctx->audio_ring.lock || (ctx->audio_ring.count <= 0 && ((ctx->play_state & STATE_DIE) != STATE_DIE)))
    {

        if (ctx->play_state & STATE_EOF) return -1;        
        usleep(100);
    }

    *pts = 0;
    ctx->audio_ring.lock = kLocked;

    if(ctx->audio_ring.count>0 && maxlength > ctx->audio_buffer[ctx->audio_ring.read].size)
    {    
        memcpy(buf, ctx->audio_buffer[ctx->audio_ring.read].data,ctx->audio_buffer[ctx->audio_ring.read].size);

        *pts = ctx->audio_buffer[ctx->audio_ring.read].pts;

        datalength = ctx->audio_buffer[ctx->audio_ring.read].size;

        ctx->audio_ring.read++;        
        ctx->audio_ring.read %= ABUF_SIZE;        
        ctx->audio_ring.count--;

    }
    ctx->audio_ring.lock = kUnlocked;

    if((ctx->play_state & STATE_EOF) == STATE_EOF && ctx->audio_ring.count == 0) *isDone = 1;

    return datalength;
}

Report

Leave an answer
Cancel reply

You must login to add an answer.

Need An Account,

1 Answer

Editorial Team · Answer 1 · 2026-05-26T21:47:05+00:00

Editorial Team

2026-05-26T21:47:05+00:00Added an answer on May 26, 2026 at 9:47 pm

The most likely reason for the distorted sound is simple packet loss, which RTSP can be susceptible to, especially over wireless connections.

I suggest you look into configuring ffmpeg to use TCP based connections when possible instead of the default RTP/UDP.

0

Reply
Share
Share

- Report

Sign Up

Sign In

Forgot Password

The Archive Base Latest Questions

I’m working on an rtsp streaming(AAC format) client for iOS using ffmpeg. Right now

Leave an answerCancel reply

1 Answer

Leave an answer
Cancel reply