-
Meet the Music Understanding framework
Discover Music Understanding, a new framework that lets your app analyze audio across six dimensions, on device: key, rhythm, structure, pace, instrument activity, and loudness. And use the Music Understanding Lab sample app to visualize each result.
Chapters
- 0:00 - Introduction
- 1:39 - Musical features
- 3:19 - Framework integration
- 3:55 - Music Understanding Lab
Resources
-
Search this video…
-
-
4:47 - Initialize the session
import MusicUnderstanding .fileImporter(isPresented: $isPresented, allowedContentTypes: [.audio]) { result in switch result { case .success(let url): let asset = AVURLAsset(url: url, options: [AVURLAssetPreferPreciseDurationAndTimingKey : true]) let session = try await MusicUnderstandingSession(asset: asset) let results = try await session.analyze() } } -
5:24 - Inside SessionResult
import MusicUnderstanding public struct SessionResult: Codable, Sendable { public let instrumentActivity: InstrumentActivityResult? public let key: KeyResult? public let loudness: LoudnessResult? public let pace: PaceResult? public let rhythm: RhythmResult? public let structure: StructureResult? } -
5:53 - TimedValue
import MusicUnderstanding public struct TimedValue<Value>: Codable, Equatable, Sendable where Value: Codable & Equatable & Sendable { public let time: CMTime public let value: Value } -
5:58 - RangedValue
import MusicUnderstanding public struct RangedValue<Value>: Codable, Equatable, Sendable where Value: Codable & Equatable & Sendable { public let range: CMTimeRange public let value: Value } -
6:27 - Key analysis
public struct KeyResult: Codable, Sendable { public let ranges: [MusicUnderstandingSession.RangedValue<KeySignature] } -
6:43 - KeySignature
public struct KeySignature: Codable, Hashable, Sendable { public let tonic: Tonic public let mode: Mode } -
6:48 - Using tonic
@frozen public enum Tonic: String, Codable, Hashable, Sendable { case aFlat, aSharp, a, bFlat, b, c, cSharp, d, dFlat, dSharp, eFlat, e, f, fSharp, g, gFlat, gSharp } -
6:59 - Using mode
public enum Mode: String, Codable, Hashable, Sendable { case major, minor } -
7:16 - Rhythm analysis
import MusicUnderstanding public struct RhythmResult: Codable, Sendable { public let beats: [CMTime] public let bars: [CMTime] public let beatsPerMinute: Float? } -
8:42 - StructureResult
import MusicUnderstanding public struct StructureResult: Codable, Sendable { public let sections: [CMTimeRange] public let segments: [CMTimeRange] public let phrases: [CMTimeRange] } -
9:26 - Analyzing pace
import MusicUnderstanding public struct PaceResult: Codable, Sendable { public let ranges: [MusicUnderstandingSession.RangedValue<Double>] } -
10:13 - InstrumentActivityResult
import MusicUnderstanding public struct InstrumentActivityResult: Codable, Sendable { public let ranges: [Instrument: [CMTimeRange]] public let activity: [Instrument: [MusicUnderstandingSession.TimedValue<Float>]] } -
11:45 - LoudnessResult
import MusicUnderstanding public struct LoudnessResult: Codable, Sendable { public let integrated: MusicUnderstandingSession.TimedValue<Float> public let momentary: [MusicUnderstandingSession.TimedValue<Float>] public let shortTerm: [MusicUnderstandingSession.TimedValue<Float>] public let peak: MusicUnderstandingSession.TimedValue<Float> } -
12:48 - Streaming API for loudness
import MusicUnderstanding public var loudnessResults: some AsyncSequence<LoudnessResult, any Error> & Sendable -
12:55 - Streaming API for loudness
import MusicUnderstanding let audioProvider = AudioProvider() let session = MusicUnderstandingSession(audioProvider: audioProvider) await withThrowingTaskGroup(of: Void.self) { taskGroup in group.addTask { for try await result in await session.loudnessResults { updateAudioLevel(result.momentary.value) } } group.addTask { try await session.analyze(for: [.loudness]) } } -
13:19 - Audio Provider
import MusicUnderstanding struct AudioProvider: AsyncSequence, AsyncIteratorProtocol { func makeAsyncIterator() -> Self { return self } mutating func next() async -> AVReadOnlyAudioPCMBuffer? { // Return the next audio buffer, or nil to signal completion } } -
13:55 - Encode to JSON
import MusicUnderstanding let session = try await MusicUnderstandingSession(asset: asset) let results = try await session.analyze() let encoder = JSONEncoder() try encoder.encode(results) -
14:47 - Suggestion for using pace
let timePerClip = 60 / paceValue
-
-
- 0:00 - Introduction
Discover how the Music Understanding framework brings on-device offline audio analysis to all Apple platforms.
- 1:39 - Musical features
Explore the six areas of the framework's music analysis: key, rhythm, structure, pace, instrument activity, and loudness.
- 3:19 - Framework integration
Learn how to initialize a MusicUnderstandingSession and begin analysis with an AVAsset or custom audio provider.
- 3:55 - Music Understanding Lab
Walk through a sample app that visualizes all analysis types from the framework.