ํ‹ฐ์Šคํ† ๋ฆฌ ๋ทฐ

 

 

 

์‹ค์‹œ๊ฐ„ ์Œ์„ฑ ์ŠคํŠธ๋ฆฌ๋ฐ ๊ตฌํ˜„์ค‘ ์ง๋ฉดํ•œ ๋ฌธ์ œ์ž…๋‹ˆ๋‹ค.

 

 

์šฐ์„  ์„œ๋ฒ„๋ž‘ ํด๋ผ์ด์–ธํŠธ๊ฐ„ 

16kHz, Int16, 30ms ๋‹จ์œ„๋กœ buffer๋ฅผ ์ฃผ๊ณ ๋ฐ›์œผ๋ ค๊ณ  ํ•˜์˜€์Šต๋‹ˆ๋‹ค.

 

 

let sampleRate: Double = 16000
let frameDuration: Double = 0.03 // 100ms
lazy var audioFrameCount = Int(sampleRate * frameDuration)

 

 

 

์ŠคํŠธ๋ฆฌ๋ฐ ๋ณด๋‚ผ๋•Œ ๊ทœ๊ฒฉ์— ๋งž์ถฐ์„œ ๋ฆฌ์ƒ˜ํ”Œ๋งํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ์ „์†กํ•ด์ฃผ์—ˆ๋Š”๋ฐ

 

// ๋ณ€ํ™˜ ํ›„ ๋ฐ์ดํ„ฐ ์ „์ฒด๋ฅผ ํ•œ ๋ฒˆ์— ์ „์†ก
let pcmBuf = AVAudioPCMBuffer(pcmFormat: mixerFormat, frameCapacity: frameCount)!
audioInputConverter?.convert(to: pcmBuf, error: nil) { _, outStatus in
    outStatus.pointee = .haveData
    return buffer
}

if let channelData = pcmBuf.int16ChannelData?.pointee {
    let array = Array(UnsafeBufferPointer(start: channelData, count: Int(pcmBuf.frameLength)))
    let data = Data(buffer: UnsafeBufferPointer(start: array, count: array.count))
    sendVoiceDataRelay.send(StreamRequest(..., data: data, ...))
}

 

 

๋ฐ›๋Š”์ชฝ์—์„œ ์Œ์„ฑ buffer๊ฐ€ ์—„์ฒญ ๋Š๊ฒจ์„œ ๋“ค๋ฆฌ๋Š” ๋ฌธ์ œ๊ฐ€ ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค

 

 

 

 

ํ™•์ธํ•ด๋ณด๋‹ˆ

iOS์—์„œ ๋งˆ์ดํฌ ์ž…๋ ฅ์„ ๋ฐ›์„ ๋•Œ,

audio engine์˜ ์ž…๋ ฅ์ฒ˜๋ฆฌ ๋ถ€๋ถ„์—์„œ 100ms ๊ฐ„๊ฒฉ์œผ๋กœ streaming ์ „์†ก๋˜๊ณ  ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค.

 

 

inputNode?.installTap(onBus: 0, bufferSize: tapFrames, format: audioInputFormat) {
                [weak self] (buffer, when) in
                // ์—ฌ๊ธฐ์„œ ์ฐ์–ด๋ณด๋‹ˆ 100ms๊ฐ„๊ฒฉ
}

 

 

audioFrameCount๋ฅผ 480(16000 * 0.03)๋กœ ์„ค์ •ํ•ด๋’€๋Š”๋ฐ๋„ 30ms๋งˆ๋‹ค ๋ณด๋‚ด์•ผํ•˜๋Š”๋ฐ

100ms ๊ฐ„๊ฒฉ์œผ๋กœ ์ „์†กํ•˜์—ฌ ๋ฐ›๋Š”์ชฝ์—์„œ ์Œ์„ฑ์ด ๋Š๊ฒจ์„œ ๋“ค๋ ธ์—ˆ์Šต๋‹ˆ๋‹ค.

 

์™œ ๊ทœ๊ฒฉ๋Œ€๋กœ ๋งž์ถฐ์ค€ ์˜ˆ์ƒ ์ „์†ก๊ฐ„๊ฒฉ 30ms๊ฐ€ ์•„๋‹Œ 100ms์ธ์ง€ ํ™•์ธํ•ด๋ณด๋‹ˆ,

audio engine์˜ ์ž…๋ ฅ ์ฒ˜๋ฆฌ ์ตœ์†Œ ๊ฐ„๊ฒฉ์ด 100ms ์ •๋„์˜€๊ณ  ์ด๊ฑธ ๋” ๋‚ฎ์ถ”๋Š”๊ฒŒ ๋ถˆ๊ฐ€๋Šฅํ–ˆ์Šต๋‹ˆ๋‹ค.

 

 

 

์ด๋ฅผ ํ•ด๊ฒฐํ•˜๊ธฐ ์œ„ํ•ด์„œ

 

1. installTap ๋‚ด๋ถ€์—์„œ 100ms ๋ฐ˜ํ™˜๋˜๋Š” ๋ฐ์ดํ„ฐ๋ฅผ ํ์— ๋‹ด์•„์ฃผ๊ณ  (*๋ฒ„ํผ ํ)

 

2. 30ms ๋งŒํผ ๋ฒ„ํผ ํ์—์„œ ๊บผ๋‚ด์„œ ๋ฐ˜ํ™˜ํ•˜์˜€์Šต๋‹ˆ๋‹ค. (*์ฒญํฌ ๋ถ„ํ• )

 

3. ๊ทธ๋ฆฌ๊ณ  30ms ์˜ค๋””์˜ค ์‚ฌ์ด์ฆˆ (480 frame)๋งŒํผ ์ž˜๋ฆฐ ๋ฐ์ดํ„ฐ๋ฅผ 30ms ์‹œ๊ฐ„ ๊ฐ„๊ฒฉ์œผ๋กœ streaming ์ „์†ก ํ•ด์ฃผ์—ˆ์Šต๋‹ˆ๋‹ค. (*pacer)

-> installTap์—์„œ๋Š” ์ตœ์†Œ ๊ฐ„๊ฒฉ์ด 100ms ์ด๋ฏ€๋กœ, 30m *3๊ฐœ 10ms * 1๊ฐœ ์ด๋ ‡๊ฒŒ ์ „์†ก์ด ์ด๋ฃจ์–ด์ง€๊ฒŒ ๋ฉ๋‹ˆ๋‹ค.

-> 30ms ์‹œ๊ฐ„๊ฐ„๊ฒฉ์œผ๋กœ streaming ํ•ด์ฃผ๊ธฐ ์œ„ํ•ด pacer๊ฐ€ ํ•„์š”

 

 

์ด๋ ‡๊ฒŒ 3๊ฐ€์ง€ ๊ฐœ๋…์„ (queue, chunk, pacer) ์ ์šฉํ•˜์—ฌ ํ•ด๊ฒฐํ•˜์˜€์Šต๋‹ˆ๋‹ค

 

 

 

์กฐ๊ธˆ ๋””ํ…Œ์ผํ•˜๊ฒŒ ์‚ดํŽด๋ณด๋ฉด

private let txQueue = DispatchQueue(label: "com.readysay.txPacer")
private var txAccum = [Int16]()
    

 inputNode?.installTap(onBus: 0, bufferSize: tapFrames, format: audioInputFormat) {
                [weak self] (buffer, when) in
                
            // ๋ฆฌ์ƒ˜ํ”Œ๋ง ๋ณ€ํ™˜ AVAudioConverter๋กœ 48kHz → 16kHz ๋ณ€ํ™˜
            // ... ์ƒ๋žต
            
            
            // ์ดํ›„ ๋ฒ„ํผํ์— ์˜ค๋””์˜ค ์ƒ˜ํ”Œ๋ง ๋ฐ์ดํ„ฐ ๋ˆ„์ 
            self.txQueue.async { [weak self] in
                guard let self = self else { return }
                self.txAccum += Array(UnsafeBufferPointer(start: chPtr, count: outFrames))
            }
 }

 

 

installTap์˜ ๋ฐ˜ํ™˜ ๋ฐ์ดํ„ฐ์ธ ๋ฆฌ์ƒ˜ํ”Œ๋ง ๋ฐ์ดํ„ฐ๋ฅผ txAccum ๋ฒ„ํผ ํ์— ๊ณ„์† ๋ˆ„์ ํ•ด์ฃผ์—ˆ์Šต๋‹ˆ๋‹ค.

 

์ดํ›„

 

let framesPerPacket = 480 // 30ms @16k
...
let pkt = Array(self.txAccum.prefix(framesPerPacket))
self.txAccum.removeFirst(framesPerPacket)

 

 

480 frame ๋งŒํผ ์ž˜๋ผ๋‚ด์–ด data๋กœ ๋งŒ๋“ค์–ด์„œ ๋„˜๊ฒจ์ฃผ๋ฉฐ

480 frame์ด ์•ˆ๋ ๊ฒฝ์šฐ ๋ฒ„ํผ ํ์— ๋‚จ๊ฒจ์„œ ๋‹ค์Œ tick๋•Œ ๋„˜๊ฒจ์ง€๋„๋ก ํ•ด๋‘์—ˆ์Šต๋‹ˆ๋‹ค.

 

 

guard self.txAccum.count >= framesPerPacket else { return }

 

 

 

์•„๋ž˜๋Š” ๋””ํ…Œ์ผ ํ•œ ์ฝ”๋“œ์ž…๋‹ˆ๋‹ค

 

private func startTxPacer(displayName: String) {
        guard txTimer == nil else { return }
        let framesPerPacket = 480        // 30ms @ 16k
        var started = false
        let minStart = framesPerPacket * 2 // ~60ms ๋งŒํผ ์Œ“์ธ ๋’ค ์‹œ์ž‘ (์ดˆ๊ธฐ ๋Š๊น€ ๋ฐฉ์ง€)

        let timer = DispatchSource.makeTimerSource(queue: txQueue)
        timer.schedule(deadline: .now() + .milliseconds(30), repeating: .milliseconds(30), leeway: .milliseconds(3))
        timer.setEventHandler { [weak self] in
            guard let self = self else { return }
            if !started {
                if self.txAccum.count < minStart { return }
                started = true
            }
            guard self.txAccum.count >= framesPerPacket else { return }

            // 30ms ํŒจํ‚ท ๋งŒ๋“ค๊ธฐ
            let pkt = Array(self.txAccum.prefix(framesPerPacket))
            self.txAccum.removeFirst(framesPerPacket)

            var data = Data()
            pkt.withUnsafeBytes { data.append($0.bindMemory(to: UInt8.self)) }

            self.txSeq &+= 1
//            print("โฑ๏ธ TX tick → #\(self.txSeq) bytes=\(data.count)")

			// ๋ฐ์ดํ„ฐ ์ŠคํŠธ๋ฆฌ๋ฐ ์ „์†ก ๋ถ€๋ถ„
            let sourceLang = LoginUserHashCache.checkSourceTranslateLnCodeHashCache()
            let targetLang = LoginUserHashCache.checkTargetTranslateLnCodeHashCache()
            let req = StreamRequest(userId: self.userId,
                                    channelId: self.channel.id,
                                    data: data,
                                    sourceLang: sourceLang,
                                    translateLang: targetLang,
                                    displayName: displayName)
            inputRelays.sendVoiceDataRelay.accept(req)
        }
        txTimer = timer
        timer.resume()
    }

 

 

๊ทธ๋ž˜์„œ 100ms ๋งˆ๋‹ค ๋ฐ˜ํ™˜๋ฐ›์€ ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ๋ฅผ

30ms ์‚ฌ์ด์ฆˆ(480 frame) ๋งŒํผ ์ฒญํฌ๋ฅผ ๋ถ„ํ• ํ•˜๊ณ 

30ms ์ฃผ๊ธฐ๋กœ ์ŠคํŠธ๋ฆฌ๋ฐ์— ๋‹ด์•„ ๋ณด๋‚ด์ฃผ๋„๋ก ํ•ด์ฃผ์—ˆ์Šต๋‹ˆ๋‹ค

 

 

์ด๋ ‡๊ฒŒ ์ฒญํฌ ๋ถ„ํ• ์„ ์ ์šฉํ•˜์—ฌ

100ms ๋‹จ์œ„ ์ž…๋ ฅ → 30ms ๋‹จ์œ„ ์ถœ๋ ฅ์œผ๋กœ ์ˆ˜์ •ํ•ด์ฃผ์—ˆ์Šต๋‹ˆ๋‹ค.

 

30ms,30ms,30ms,10ms ํ•œ๋ฒˆ์— ์ „์†ก๋˜์ง€์•Š๊ณ 

์ผ์ •ํ•˜๊ฒŒ 30ms ์ฃผ๊ธฐ๋กœ ์ „์†ก๋˜๋„๋ก pacer๋ฅผ ์ ์šฉํ•˜์—ฌ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•ด์ฃผ์—ˆ์Šต๋‹ˆ๋‹ค

 

 

 

๋Œ“๊ธ€