From f7905eee87ca4828e57a5c7bfbfd8cd2bb7d82d7 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Sun, 11 Aug 2024 20:23:47 +0000 Subject: [PATCH] Auto-format by https://ultralytics.com/actions --- YOLO/AppDelegate.swift | 80 +- YOLO/Utilities/BoundingBoxView.swift | 153 +-- YOLO/Utilities/HumanModel.swift | 291 ++--- YOLO/Utilities/PostProcessing.swift | 152 +-- YOLO/Utilities/SaveResults.swift | 75 +- YOLO/Utilities/ThresholdProvider.swift | 44 +- YOLO/Utilities/TrackingModel.swift | 206 +-- YOLO/VideoCapture.swift | 259 ++-- YOLO/ViewController.swift | 1653 ++++++++++++------------ 9 files changed, 1486 insertions(+), 1427 deletions(-) diff --git a/YOLO/AppDelegate.swift b/YOLO/AppDelegate.swift index 25a926f..dfd8998 100644 --- a/YOLO/AppDelegate.swift +++ b/YOLO/AppDelegate.swift @@ -17,57 +17,61 @@ import UIKit /// The main application delegate, handling global app behavior and configuration. @UIApplicationMain class AppDelegate: UIResponder, UIApplicationDelegate { - var window: UIWindow? + var window: UIWindow? - /// Called when the app finishes launching, used here to set global app settings. - func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { - // Disable screen dimming and auto-lock to keep the app active during long operations. - UIApplication.shared.isIdleTimerDisabled = true + /// Called when the app finishes launching, used here to set global app settings. + func application( + _ application: UIApplication, + didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]? + ) -> Bool { + // Disable screen dimming and auto-lock to keep the app active during long operations. + UIApplication.shared.isIdleTimerDisabled = true - // Enable battery monitoring to allow the app to adapt its behavior based on battery level. - UIDevice.current.isBatteryMonitoringEnabled = true + // Enable battery monitoring to allow the app to adapt its behavior based on battery level. + UIDevice.current.isBatteryMonitoringEnabled = true - // Store the app version and build version in UserDefaults for easy access elsewhere in the app. - if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, - let buildVersion = Bundle.main.infoDictionary?["CFBundleVersion"] as? String { - UserDefaults.standard.set("\(appVersion) (\(buildVersion))", forKey: "app_version") - } + // Store the app version and build version in UserDefaults for easy access elsewhere in the app. + if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, + let buildVersion = Bundle.main.infoDictionary?["CFBundleVersion"] as? String + { + UserDefaults.standard.set("\(appVersion) (\(buildVersion))", forKey: "app_version") + } - // Store the device's UUID in UserDefaults for identification purposes. - if let uuid = UIDevice.current.identifierForVendor?.uuidString { - UserDefaults.standard.set(uuid, forKey: "uuid") - } + // Store the device's UUID in UserDefaults for identification purposes. + if let uuid = UIDevice.current.identifierForVendor?.uuidString { + UserDefaults.standard.set(uuid, forKey: "uuid") + } - // Ensure UserDefaults changes are immediately saved. - UserDefaults.standard.synchronize() + // Ensure UserDefaults changes are immediately saved. + UserDefaults.standard.synchronize() - return true - } - - func applicationDidBecomeActive(_ application: UIApplication) { - NotificationCenter.default.post(name: .settingsChanged, object: nil) - } + return true + } + + func applicationDidBecomeActive(_ application: UIApplication) { + NotificationCenter.default.post(name: .settingsChanged, object: nil) + } } extension Notification.Name { - static let settingsChanged = Notification.Name("settingsChanged") + static let settingsChanged = Notification.Name("settingsChanged") } /// Extension to CALayer to add functionality for generating screenshots of any layer. extension CALayer { - var screenShot: UIImage? { - // Begin a new image context, using the device's screen scale to ensure high-resolution output. - UIGraphicsBeginImageContextWithOptions(frame.size, false, UIScreen.main.scale) - defer { - UIGraphicsEndImageContext() - } // Ensure the image context is cleaned up correctly. + var screenShot: UIImage? { + // Begin a new image context, using the device's screen scale to ensure high-resolution output. + UIGraphicsBeginImageContextWithOptions(frame.size, false, UIScreen.main.scale) + defer { + UIGraphicsEndImageContext() + } // Ensure the image context is cleaned up correctly. - if let context = UIGraphicsGetCurrentContext() { - // Render the layer into the current context. - render(in: context) - // Attempt to generate an image from the current context. - return UIGraphicsGetImageFromCurrentImageContext() - } - return nil // Return nil if the operation fails. + if let context = UIGraphicsGetCurrentContext() { + // Render the layer into the current context. + render(in: context) + // Attempt to generate an image from the current context. + return UIGraphicsGetImageFromCurrentImageContext() } + return nil // Return nil if the operation fails. + } } diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index dfb758e..c743e44 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -14,92 +14,95 @@ import UIKit /// Manages the visualization of bounding boxes and associated labels for object detection results. class BoundingBoxView { - /// The layer that draws the bounding box around a detected object. - let shapeLayer: CAShapeLayer + /// The layer that draws the bounding box around a detected object. + let shapeLayer: CAShapeLayer - /// The layer that displays the label and confidence score for the detected object. - let textLayer: CATextLayer + /// The layer that displays the label and confidence score for the detected object. + let textLayer: CATextLayer - /// The layer that displays the inner text within the bounding box. - let innerTextLayer: CATextLayer + /// The layer that displays the inner text within the bounding box. + let innerTextLayer: CATextLayer - /// Initializes a new BoundingBoxView with configured shape and text layers. - init() { - shapeLayer = CAShapeLayer() - shapeLayer.fillColor = UIColor.clear.cgColor // No fill to only show the bounding outline - shapeLayer.lineWidth = 4 // Set the stroke line width - shapeLayer.isHidden = true // Initially hidden; shown when a detection occurs + /// Initializes a new BoundingBoxView with configured shape and text layers. + init() { + shapeLayer = CAShapeLayer() + shapeLayer.fillColor = UIColor.clear.cgColor // No fill to only show the bounding outline + shapeLayer.lineWidth = 4 // Set the stroke line width + shapeLayer.isHidden = true // Initially hidden; shown when a detection occurs - textLayer = CATextLayer() - textLayer.isHidden = true // Initially hidden; shown with label when a detection occurs - textLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays - textLayer.fontSize = 14 // Set font size for the label text - textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels - textLayer.alignmentMode = .center // Center-align the text within the layer + textLayer = CATextLayer() + textLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + textLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + textLayer.fontSize = 14 // Set font size for the label text + textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels + textLayer.alignmentMode = .center // Center-align the text within the layer - innerTextLayer = CATextLayer() - innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs - innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays - innerTextLayer.fontSize = 12 // Set font size for the inner text - innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text - innerTextLayer.alignmentMode = .left // Left-align the text within the layer - innerTextLayer.isWrapped = true // Wrap the text to fit within the layer - } - - /// Adds the bounding box, text, and inner text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. - func addToLayer(_ parent: CALayer) { - parent.addSublayer(shapeLayer) - parent.addSublayer(textLayer) - parent.addSublayer(innerTextLayer) - } + innerTextLayer = CATextLayer() + innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + innerTextLayer.fontSize = 12 // Set font size for the inner text + innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text + innerTextLayer.alignmentMode = .left // Left-align the text within the layer + innerTextLayer.isWrapped = true // Wrap the text to fit within the layer + } - /// Updates the bounding box, label, and inner text to be visible with specified properties. - /// - Parameters: - /// - frame: The CGRect frame defining the bounding box's size and position. - /// - label: The text label to display (e.g., object class and confidence). - /// - color: The color of the bounding box stroke and label background. - /// - alpha: The opacity level for the bounding box stroke and label background. - /// - innerTexts: The text to display inside the bounding box. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { - CATransaction.setDisableActions(true) // Disable implicit animations + /// Adds the bounding box, text, and inner text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. + func addToLayer(_ parent: CALayer) { + parent.addSublayer(shapeLayer) + parent.addSublayer(textLayer) + parent.addSublayer(innerTextLayer) + } - let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box - shapeLayer.path = path.cgPath - shapeLayer.strokeColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the stroke - shapeLayer.isHidden = false // Make the shape layer visible + /// Updates the bounding box, label, and inner text to be visible with specified properties. + /// - Parameters: + /// - frame: The CGRect frame defining the bounding box's size and position. + /// - label: The text label to display (e.g., object class and confidence). + /// - color: The color of the bounding box stroke and label background. + /// - alpha: The opacity level for the bounding box stroke and label background. + /// - innerTexts: The text to display inside the bounding box. + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { + CATransaction.setDisableActions(true) // Disable implicit animations - textLayer.string = label // Set the label text - textLayer.backgroundColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the background - textLayer.isHidden = false // Make the text layer visible - textLayer.foregroundColor = UIColor.white.withAlphaComponent(alpha).cgColor // Set text color + let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box + shapeLayer.path = path.cgPath + shapeLayer.strokeColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the stroke + shapeLayer.isHidden = false // Make the shape layer visible - // Calculate the text size and position based on the label content - let attributes = [NSAttributedString.Key.font: textLayer.font as Any] - let textRect = label.boundingRect(with: CGSize(width: 400, height: 100), - options: .truncatesLastVisibleLine, - attributes: attributes, context: nil) - let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size - let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box - textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + textLayer.string = label // Set the label text + textLayer.backgroundColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the background + textLayer.isHidden = false // Make the text layer visible + textLayer.foregroundColor = UIColor.white.withAlphaComponent(alpha).cgColor // Set text color - if !innerTexts.isEmpty { - innerTextLayer.string = innerTexts // Set the inner text - innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color - innerTextLayer.isHidden = false // Make the inner text layer visible - innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color - innerTextLayer.frame = CGRect(x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, height: frame.height - 8) - // Set the inner text layer frame - } else { - innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty - } + // Calculate the text size and position based on the label content + let attributes = [NSAttributedString.Key.font: textLayer.font as Any] + let textRect = label.boundingRect( + with: CGSize(width: 400, height: 100), + options: .truncatesLastVisibleLine, + attributes: attributes, context: nil) + let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size + let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box + textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + if !innerTexts.isEmpty { + innerTextLayer.string = innerTexts // Set the inner text + innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color + innerTextLayer.isHidden = false // Make the inner text layer visible + innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color + innerTextLayer.frame = CGRect( + x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, + height: frame.height - 8) + // Set the inner text layer frame + } else { + innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty } - /// Hides the bounding box, text, and inner text layers. - func hide() { - shapeLayer.isHidden = true - textLayer.isHidden = true - innerTextLayer.isHidden = true - } + } + + /// Hides the bounding box, text, and inner text layers. + func hide() { + shapeLayer.isHidden = true + textLayer.isHidden = true + innerTextLayer.isHidden = true + } } diff --git a/YOLO/Utilities/HumanModel.swift b/YOLO/Utilities/HumanModel.swift index be77ffb..47c4325 100644 --- a/YOLO/Utilities/HumanModel.swift +++ b/YOLO/Utilities/HumanModel.swift @@ -5,163 +5,164 @@ // This struct automatically analyzes the boxes, scores, and feature values ​​provided to the update function to create a human model.// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license // Access the source code: https://github.com/ultralytics/yolo-ios-app - import Foundation import UIKit let updateFrequency: Int = 120 struct Person { - var index: Int - var box: CGRect = .zero - - var score: Float = 0 - var weight: Float = 0 - var height: Float = 0 - - var age: Int = 0 - - var gender: String = "female" - var genderConfidence: Float = 0 - var race: String = "asian" - var raceConfidence: Float = 0 - - var listCount: Int = 0 - var scoreRawList: [Float] = [] - var weightRawList: [Float] = [] - var heightRawList: [Float] = [] - var ageRawList: [Float] = [] - var maleRawList: [Float] = [] - var femaleRawList: [Float] = [] - var asianRawList: [Float] = [] - var whiteRawList: [Float] = [] - var middleEasternRawList: [Float] = [] - var indianRawList: [Float] = [] - var latinoRawList: [Float] = [] - var blackRawList: [Float] = [] - - var trackedBox: CGRect? - var color:UIColor - - var unDetectedCounter: Int = 0 - var stable = false - - init(index: Int) { - self.index = index - self.color = UIColor(red: CGFloat.random(in: 0...1), - green: CGFloat.random(in: 0...1), - blue: CGFloat.random(in: 0...1), - alpha: 0.6) - } - - mutating func update(box:CGRect, score:Float, features:[Float]) { - self.box = box - if scoreRawList.count >= updateFrequency { - scoreRawList.removeFirst() - weightRawList.removeFirst() - heightRawList.removeFirst() - ageRawList.removeFirst() - maleRawList.removeFirst() - femaleRawList.removeFirst() - asianRawList.removeFirst() - whiteRawList.removeFirst() - middleEasternRawList.removeFirst() - indianRawList.removeFirst() - latinoRawList.removeFirst() - blackRawList.removeFirst() - } - - - self.scoreRawList.append(score) - self.weightRawList.append(Float(applyPolyModel(weight: Double(features[0])))) - self.heightRawList.append(features[1]) - self.ageRawList.append(features[2]) - self.femaleRawList.append(features[3]) - self.maleRawList.append(features[4]) - self.asianRawList.append(features[5]) - self.whiteRawList.append(features[6]) - self.middleEasternRawList.append(features[7]) - self.indianRawList.append(features[8]) - self.latinoRawList.append(features[9]) - self.blackRawList.append(features[10]) - calcurateFeatures() - - self.unDetectedCounter = 0 - } - - private mutating func calcurateFeatures() { - - self.score = average(of: scoreRawList) - self.weight = average(of: weightRawList) - self.height = average(of: heightRawList) - self.age = Int(round(average(of: ageRawList))) - let femaleAverage = average(of: femaleRawList) - let maleAverage = average(of: maleRawList) - let genderCandidates = [femaleAverage,maleAverage] - var genderMaxIndex = 0 - var genderMaxValue = genderCandidates[0] - - for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { - if genderValue > genderMaxValue { - genderMaxValue = genderValue - genderMaxIndex = genderIndex + 1 - } - } - - self.gender = genders[genderMaxIndex] - self.genderConfidence = genderMaxValue - - let asianAverage = average(of: asianRawList) - let whiteAverage = average(of: whiteRawList) - let middleEasternAverage = average(of: middleEasternRawList) - let indianAverage = average(of: indianRawList) - let latinoAverage = average(of: latinoRawList) - let blackAverage = average(of: blackRawList) - - let raceCandidates = [asianAverage,whiteAverage,middleEasternAverage,indianAverage,latinoAverage,blackAverage] - var raceMaxIndex = 0 - var raceMaxValue = raceCandidates[0] - - for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { - if raceValue > raceMaxValue { - raceMaxValue = raceValue - raceMaxIndex = raceIndex + 1 - } - } - self.race = races[raceMaxIndex] - self.raceConfidence = raceMaxValue + var index: Int + var box: CGRect = .zero + + var score: Float = 0 + var weight: Float = 0 + var height: Float = 0 + + var age: Int = 0 + + var gender: String = "female" + var genderConfidence: Float = 0 + var race: String = "asian" + var raceConfidence: Float = 0 + + var listCount: Int = 0 + var scoreRawList: [Float] = [] + var weightRawList: [Float] = [] + var heightRawList: [Float] = [] + var ageRawList: [Float] = [] + var maleRawList: [Float] = [] + var femaleRawList: [Float] = [] + var asianRawList: [Float] = [] + var whiteRawList: [Float] = [] + var middleEasternRawList: [Float] = [] + var indianRawList: [Float] = [] + var latinoRawList: [Float] = [] + var blackRawList: [Float] = [] + + var trackedBox: CGRect? + var color: UIColor + + var unDetectedCounter: Int = 0 + var stable = false + + init(index: Int) { + self.index = index + self.color = UIColor( + red: CGFloat.random(in: 0...1), + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) + } + + mutating func update(box: CGRect, score: Float, features: [Float]) { + self.box = box + if scoreRawList.count >= updateFrequency { + scoreRawList.removeFirst() + weightRawList.removeFirst() + heightRawList.removeFirst() + ageRawList.removeFirst() + maleRawList.removeFirst() + femaleRawList.removeFirst() + asianRawList.removeFirst() + whiteRawList.removeFirst() + middleEasternRawList.removeFirst() + indianRawList.removeFirst() + latinoRawList.removeFirst() + blackRawList.removeFirst() } - - func average(of numbers: [Float]) -> Float { - guard !numbers.isEmpty else { - return 0 - } - var sum: Float = 0 - for number in numbers { - sum += number - } - return sum / Float(numbers.count) - } - - private mutating func applyPolyModel(weight: Double, degree: Int = 4) -> Double { - let coef: [Double] = [0, 44.196, -1.2491, 0.014767, -6.2096e-05] - let intercept: Double = -477.7766285997004 - let weight2 = weight * weight - let weight3 = weight2 * weight - let weight4 = weight3 * weight + self.scoreRawList.append(score) + self.weightRawList.append(Float(applyPolyModel(weight: Double(features[0])))) + self.heightRawList.append(features[1]) + self.ageRawList.append(features[2]) + self.femaleRawList.append(features[3]) + self.maleRawList.append(features[4]) + self.asianRawList.append(features[5]) + self.whiteRawList.append(features[6]) + self.middleEasternRawList.append(features[7]) + self.indianRawList.append(features[8]) + self.latinoRawList.append(features[9]) + self.blackRawList.append(features[10]) + calcurateFeatures() + + self.unDetectedCounter = 0 + } + + private mutating func calcurateFeatures() { + + self.score = average(of: scoreRawList) + self.weight = average(of: weightRawList) + self.height = average(of: heightRawList) + self.age = Int(round(average(of: ageRawList))) + let femaleAverage = average(of: femaleRawList) + let maleAverage = average(of: maleRawList) + let genderCandidates = [femaleAverage, maleAverage] + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } - let result = coef[0] - + coef[1] * weight - + coef[2] * weight2 - + coef[3] * weight3 - + coef[4] * weight4 - + intercept + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let asianAverage = average(of: asianRawList) + let whiteAverage = average(of: whiteRawList) + let middleEasternAverage = average(of: middleEasternRawList) + let indianAverage = average(of: indianRawList) + let latinoAverage = average(of: latinoRawList) + let blackAverage = average(of: blackRawList) + + let raceCandidates = [ + asianAverage, whiteAverage, middleEasternAverage, indianAverage, latinoAverage, blackAverage, + ] + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } - return result + func average(of numbers: [Float]) -> Float { + guard !numbers.isEmpty else { + return 0 } + var sum: Float = 0 + for number in numbers { + sum += number + } + return sum / Float(numbers.count) + } + + private mutating func applyPolyModel(weight: Double, degree: Int = 4) -> Double { + let coef: [Double] = [0, 44.196, -1.2491, 0.014767, -6.2096e-05] + let intercept: Double = -477.7766285997004 + + let weight2 = weight * weight + let weight3 = weight2 * weight + let weight4 = weight3 * weight + + let result = + coef[0] + + coef[1] * weight + + coef[2] * weight2 + + coef[3] * weight3 + + coef[4] * weight4 + + intercept + + return result + } } let genders = ["female", "male"] let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] - diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift index db25d12..d84e14a 100644 --- a/YOLO/Utilities/PostProcessing.swift +++ b/YOLO/Utilities/PostProcessing.swift @@ -6,98 +6,98 @@ // Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license // Access the source code: https://github.com/ultralytics/yolo-ios-app - -import Foundation import CoreML +import Foundation import Vision func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { - let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } - var selectedIndices = [Int]() - var activeIndices = [Bool](repeating: true, count: boxes.count) + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) - for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { - activeIndices[otherIdx] = false - } - } - } + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } } + } } - return selectedIndices + } + return selectedIndices } // Human model's output [1,15,8400] to [(Box, Confidence, HumanFeatures)] -func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { - let numAnchors = prediction.shape[2].intValue - var boxes = [CGRect]() - var scores = [Float]() - var features = [[Float]]() - let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) - let lock = DispatchQueue(label: "com.example.lock") +func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) + -> [(CGRect, Float, [Float])] +{ + let numAnchors = prediction.shape[2].intValue + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") - DispatchQueue.concurrentPerform(iterations: numAnchors) { j in - let confIndex = 4 * numAnchors + j - let confidence = featurePointer[confIndex] - if confidence > confidenceThreshold { - let x = featurePointer[j] - let y = featurePointer[numAnchors + j] - let width = featurePointer[2 * numAnchors + j] - let height = featurePointer[3 * numAnchors + j] - - let boxWidth = CGFloat(width) - let boxHeight = CGFloat(height) - let boxX = CGFloat(x - width / 2) - let boxY = CGFloat(y - height / 2) - - let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) - - var boxFeatures = [Float](repeating: 0, count: 11) - for k in 0..<11 { - let key = (5 + k) * numAnchors + j - boxFeatures[k] = featurePointer[key] - } - - lock.sync { - boxes.append(boundingBox) - scores.append(confidence) - features.append(boxFeatures) - } - } - } - - let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) - var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() - - for idx in selectedIndices { - selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: 11) + for k in 0..<11 { + let key = (5 + k) * numAnchors + j + boxFeatures[k] = featurePointer[key] + } + + lock.sync { + boxes.append(boundingBox) + scores.append(confidence) + features.append(boxFeatures) + } } - print(selectedBoxesAndFeatures) - return selectedBoxesAndFeatures + } + + let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) + var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() + + for idx in selectedIndices { + selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) + } + print(selectedBoxesAndFeatures) + return selectedBoxesAndFeatures } -func toPerson(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { - var persons = [Person]() - for detectedHuman in boxesAndScoresAndFeatures { - var person = Person(index: -1) - person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) - person.color = .red - persons.append(person) - } - return persons +func toPerson(boxesAndScoresAndFeatures: [(CGRect, Float, [Float])]) -> [Person] { + var persons = [Person]() + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: -1) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + person.color = .red + persons.append(person) + } + return persons } extension CGRect { - var area: CGFloat { - return width * height - } + var area: CGFloat { + return width * height + } } - diff --git a/YOLO/Utilities/SaveResults.swift b/YOLO/Utilities/SaveResults.swift index c46839b..ced631d 100644 --- a/YOLO/Utilities/SaveResults.swift +++ b/YOLO/Utilities/SaveResults.swift @@ -8,40 +8,43 @@ import Foundation -let detectionHeader = "sec_day, free_space, batteryLevel ,class,confidence,box_x, box_y, box_w, box_h\n" -let humanHeader = "sec_day, free_space, battery_level ,id, confidence, box_x, box_y, box_w, box_h, weight, height, age, gender, gender_confidence, race, race_confidence \n" - -func saveDetectionResultsToCSV(detectionResults:[String], task: Task) -> URL? { - var header = "" - var taskName = "" - switch task { - case .detect: - header = detectionHeader - taskName = "detection" - - case .human: - header = humanHeader - taskName = "human" - } - let formatter = DateFormatter() - formatter.dateFormat = "yyyyMMdd_HH:mm:ss" - let dateString = formatter.string(from: Date()) - let fileName = taskName + "_results_\(dateString).csv" - - let path = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent(fileName) - - var csvText = header - - for result in detectionResults { - csvText.append(contentsOf: result) - } - - do { - try csvText.write(to: path, atomically: true, encoding: .utf8) - print("CSV file saved at: \(path)") - return path - } catch { - print("Failed to save CSV file: \(error)") - return nil - } +let detectionHeader = + "sec_day, free_space, batteryLevel ,class,confidence,box_x, box_y, box_w, box_h\n" +let humanHeader = + "sec_day, free_space, battery_level ,id, confidence, box_x, box_y, box_w, box_h, weight, height, age, gender, gender_confidence, race, race_confidence \n" + +func saveDetectionResultsToCSV(detectionResults: [String], task: Task) -> URL? { + var header = "" + var taskName = "" + switch task { + case .detect: + header = detectionHeader + taskName = "detection" + + case .human: + header = humanHeader + taskName = "human" + } + let formatter = DateFormatter() + formatter.dateFormat = "yyyyMMdd_HH:mm:ss" + let dateString = formatter.string(from: Date()) + let fileName = taskName + "_results_\(dateString).csv" + + let path = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + .appendingPathComponent(fileName) + + var csvText = header + + for result in detectionResults { + csvText.append(contentsOf: result) + } + + do { + try csvText.write(to: path, atomically: true, encoding: .utf8) + print("CSV file saved at: \(path)") + return path + } catch { + print("Failed to save CSV file: \(error)") + return nil + } } diff --git a/YOLO/Utilities/ThresholdProvider.swift b/YOLO/Utilities/ThresholdProvider.swift index 53702d1..22c8d6a 100644 --- a/YOLO/Utilities/ThresholdProvider.swift +++ b/YOLO/Utilities/ThresholdProvider.swift @@ -14,29 +14,29 @@ import CoreML /// Provides custom IoU and confidence thresholds for adjusting model predictions. class ThresholdProvider: MLFeatureProvider { - /// Stores IoU and confidence thresholds as MLFeatureValue objects. - var values: [String: MLFeatureValue] + /// Stores IoU and confidence thresholds as MLFeatureValue objects. + var values: [String: MLFeatureValue] - /// The set of feature names provided by this provider. - var featureNames: Set { - return Set(values.keys) - } + /// The set of feature names provided by this provider. + var featureNames: Set { + return Set(values.keys) + } - /// Initializes the provider with specified IoU and confidence thresholds. - /// - Parameters: - /// - iouThreshold: The IoU threshold for determining object overlap. - /// - confidenceThreshold: The minimum confidence for considering a detection valid. - init(iouThreshold: Double = 0.45, confidenceThreshold: Double = 0.25) { - values = [ - "iouThreshold": MLFeatureValue(double: iouThreshold), - "confidenceThreshold": MLFeatureValue(double: confidenceThreshold) - ] - } + /// Initializes the provider with specified IoU and confidence thresholds. + /// - Parameters: + /// - iouThreshold: The IoU threshold for determining object overlap. + /// - confidenceThreshold: The minimum confidence for considering a detection valid. + init(iouThreshold: Double = 0.45, confidenceThreshold: Double = 0.25) { + values = [ + "iouThreshold": MLFeatureValue(double: iouThreshold), + "confidenceThreshold": MLFeatureValue(double: confidenceThreshold), + ] + } - /// Returns the feature value for the given feature name. - /// - Parameter featureName: The name of the feature. - /// - Returns: The MLFeatureValue object corresponding to the feature name. - func featureValue(for featureName: String) -> MLFeatureValue? { - return values[featureName] - } + /// Returns the feature value for the given feature name. + /// - Parameter featureName: The name of the feature. + /// - Returns: The MLFeatureValue object corresponding to the feature name. + func featureValue(for featureName: String) -> MLFeatureValue? { + return values[featureName] + } } diff --git a/YOLO/Utilities/TrackingModel.swift b/YOLO/Utilities/TrackingModel.swift index a4f5dc5..1beedfa 100644 --- a/YOLO/Utilities/TrackingModel.swift +++ b/YOLO/Utilities/TrackingModel.swift @@ -6,121 +6,123 @@ // The tack function is a simple tracking algorithm that tracks boxes of the same person based on box overlap across frames. // Access the source code: https://github.com/ultralytics/yolo-ios-app +import Accelerate import Foundation import Vision -import Accelerate class TrackingModel { - var persons = [Person]() - var personIndex:Int = 0 - var recent:[(CGRect, Float, [Float])] = [] - - func track(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { - - if persons.isEmpty { - for detectedHuman in boxesAndScoresAndFeatures { - var person = Person(index: personIndex) - person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) - personIndex += 1 - persons.append(person) - - } - return persons - } - - var unDetectedPersonIndexes:[Int] = [] - var usedDetectedIndex:Set = Set() - - for (pi, person) in persons.enumerated() { - var bestIOU:CGFloat = 0 - var bestIndex = 0 - - for (i, detected) in boxesAndScoresAndFeatures.enumerated() { - let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) - if IoU > bestIOU { - bestIOU = IoU - bestIndex = i - } - } - if bestIOU >= 50 { - let detectedPerson = boxesAndScoresAndFeatures[bestIndex] - persons[pi].update(box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) - usedDetectedIndex.insert(bestIndex) - } else { - unDetectedPersonIndexes.append(pi) - } - } - - let sortedIndices = unDetectedPersonIndexes.sorted(by: >) - for index in sortedIndices { - persons[index].unDetectedCounter += 1 - } - - for (index, det) in boxesAndScoresAndFeatures.enumerated() { - if !usedDetectedIndex.contains(index) { - var person = Person(index: personIndex) - person.update(box: det.0, score: det.1, features: det.2) - personIndex += 1 - persons.append(person) - } - } - - persons = removeOverlappingRects(persons: persons) - - var personsToShow: [Person] = [] - var removePersonIndexes: [Int] = [] - for (pindex, person) in persons.enumerated() { - if person.unDetectedCounter == 0 { - personsToShow.append(person) - } else if person.unDetectedCounter >= 15 { - removePersonIndexes.append(pindex) - } - } - let sortedRemoveIndices = removePersonIndexes.sorted(by: >) - for index in sortedRemoveIndices { - persons.remove(at: index) + var persons = [Person]() + var personIndex: Int = 0 + var recent: [(CGRect, Float, [Float])] = [] + + func track(boxesAndScoresAndFeatures: [(CGRect, Float, [Float])]) -> [Person] { + + if persons.isEmpty { + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: personIndex) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + personIndex += 1 + persons.append(person) + + } + return persons + } + + var unDetectedPersonIndexes: [Int] = [] + var usedDetectedIndex: Set = Set() + + for (pi, person) in persons.enumerated() { + var bestIOU: CGFloat = 0 + var bestIndex = 0 + + for (i, detected) in boxesAndScoresAndFeatures.enumerated() { + let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) + if IoU > bestIOU { + bestIOU = IoU + bestIndex = i } + } + if bestIOU >= 50 { + let detectedPerson = boxesAndScoresAndFeatures[bestIndex] + persons[pi].update( + box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) + usedDetectedIndex.insert(bestIndex) + } else { + unDetectedPersonIndexes.append(pi) + } + } - return personsToShow + let sortedIndices = unDetectedPersonIndexes.sorted(by: >) + for index in sortedIndices { + persons[index].unDetectedCounter += 1 + } + for (index, det) in boxesAndScoresAndFeatures.enumerated() { + if !usedDetectedIndex.contains(index) { + var person = Person(index: personIndex) + person.update(box: det.0, score: det.1, features: det.2) + personIndex += 1 + persons.append(person) + } } + + persons = removeOverlappingRects(persons: persons) + + var personsToShow: [Person] = [] + var removePersonIndexes: [Int] = [] + for (pindex, person) in persons.enumerated() { + if person.unDetectedCounter == 0 { + personsToShow.append(person) + } else if person.unDetectedCounter >= 15 { + removePersonIndexes.append(pindex) + } + } + let sortedRemoveIndices = removePersonIndexes.sorted(by: >) + for index in sortedRemoveIndices { + persons.remove(at: index) + } + + return personsToShow + + } } func overlapPercentage(rect1: CGRect, rect2: CGRect) -> CGFloat { - let intersection = rect1.intersection(rect2) - - if intersection.isNull { - return 0.0 - } - - let intersectionArea = intersection.width * intersection.height - - let rect1Area = rect1.width * rect1.height - - let overlapPercentage = (intersectionArea / rect1Area) * 100 - - return overlapPercentage + let intersection = rect1.intersection(rect2) + + if intersection.isNull { + return 0.0 + } + + let intersectionArea = intersection.width * intersection.height + + let rect1Area = rect1.width * rect1.height + + let overlapPercentage = (intersectionArea / rect1Area) * 100 + + return overlapPercentage } func removeOverlappingRects(persons: [Person], threshold: CGFloat = 90.0) -> [Person] { - var filteredPersons = persons - var index = 0 - - while index < filteredPersons.count { - var shouldRemove = false - for j in (index + 1)..= threshold { - shouldRemove = true - break - } - } - if shouldRemove { - filteredPersons.remove(at: index) - } else { - index += 1 - } + var filteredPersons = persons + var index = 0 + + while index < filteredPersons.count { + var shouldRemove = false + for j in (index + 1)..= threshold { + shouldRemove = true + break + } } - - return filteredPersons + if shouldRemove { + filteredPersons.remove(at: index) + } else { + index += 1 + } + } + + return filteredPersons } diff --git a/YOLO/VideoCapture.swift b/YOLO/VideoCapture.swift index 0c9db9c..7faf8f9 100644 --- a/YOLO/VideoCapture.swift +++ b/YOLO/VideoCapture.swift @@ -11,154 +11,165 @@ // the capture session. It also provides methods to start and stop video capture and delivers captured frames // to a delegate implementing the VideoCaptureDelegate protocol. - import AVFoundation import CoreVideo import UIKit // Defines the protocol for handling video frame capture events. public protocol VideoCaptureDelegate: AnyObject { - func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CMSampleBuffer) + func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CMSampleBuffer) } // Identifies the best available camera device based on user preferences and device capabilities. func bestCaptureDevice() -> AVCaptureDevice { - if UserDefaults.standard.bool(forKey: "use_telephoto"), let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) { - return device - } else if let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back) { - return device - } else if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) { - return device - } else { - fatalError("Expected back camera device is not available.") - } + if UserDefaults.standard.bool(forKey: "use_telephoto"), + let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) + { + return device + } else if let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back) { + return device + } else if let device = AVCaptureDevice.default( + .builtInWideAngleCamera, for: .video, position: .back) + { + return device + } else { + fatalError("Expected back camera device is not available.") + } } public class VideoCapture: NSObject { - public var previewLayer: AVCaptureVideoPreviewLayer? - public weak var delegate: VideoCaptureDelegate? - - let captureDevice = bestCaptureDevice() - let captureSession = AVCaptureSession() - let videoOutput = AVCaptureVideoDataOutput() - var cameraOutput = AVCapturePhotoOutput() - let queue = DispatchQueue(label: "camera-queue") - - // Configures the camera and capture session with optional session presets. - public func setUp(sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void) { - queue.async { - let success = self.setUpCamera(sessionPreset: sessionPreset) - DispatchQueue.main.async { - completion(success) - } - } + public var previewLayer: AVCaptureVideoPreviewLayer? + public weak var delegate: VideoCaptureDelegate? + + let captureDevice = bestCaptureDevice() + let captureSession = AVCaptureSession() + let videoOutput = AVCaptureVideoDataOutput() + var cameraOutput = AVCapturePhotoOutput() + let queue = DispatchQueue(label: "camera-queue") + + // Configures the camera and capture session with optional session presets. + public func setUp( + sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void + ) { + queue.async { + let success = self.setUpCamera(sessionPreset: sessionPreset) + DispatchQueue.main.async { + completion(success) + } + } + } + + // Internal method to configure camera inputs, outputs, and session properties. + private func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { + captureSession.beginConfiguration() + captureSession.sessionPreset = sessionPreset + + guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { + return false } - // Internal method to configure camera inputs, outputs, and session properties. - private func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { - captureSession.beginConfiguration() - captureSession.sessionPreset = sessionPreset - - guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { - return false - } - - if captureSession.canAddInput(videoInput) { - captureSession.addInput(videoInput) - } - - let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) - previewLayer.videoGravity = .resizeAspectFill - previewLayer.connection?.videoOrientation = .portrait - self.previewLayer = previewLayer - - let settings: [String: Any] = [ - kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) - ] - - videoOutput.videoSettings = settings - videoOutput.alwaysDiscardsLateVideoFrames = true - videoOutput.setSampleBufferDelegate(self, queue: queue) - if captureSession.canAddOutput(videoOutput) { - captureSession.addOutput(videoOutput) - } - - if captureSession.canAddOutput(cameraOutput) { - captureSession.addOutput(cameraOutput) - } - switch UIDevice.current.orientation { - case .portrait: - videoOutput.connection(with: .video)?.videoOrientation = .portrait - case .portraitUpsideDown: - videoOutput.connection(with: .video)?.videoOrientation = .portraitUpsideDown - case .landscapeRight: - videoOutput.connection(with: .video)?.videoOrientation = .landscapeLeft - case .landscapeLeft: - videoOutput.connection(with: .video)?.videoOrientation = .landscapeRight - default: - videoOutput.connection(with: .video)?.videoOrientation = .portrait - } - - if let connection = videoOutput.connection(with: .video) { - self.previewLayer?.connection?.videoOrientation = connection.videoOrientation - } - do { - try captureDevice.lockForConfiguration() - captureDevice.focusMode = .continuousAutoFocus - captureDevice.focusPointOfInterest = CGPoint(x: 0.5, y: 0.5) - captureDevice.exposureMode = .continuousAutoExposure - captureDevice.unlockForConfiguration() - } catch { - print("Unable to configure the capture device.") - return false - } - - captureSession.commitConfiguration() - return true + if captureSession.canAddInput(videoInput) { + captureSession.addInput(videoInput) } - // Starts the video capture session. - public func start() { - if !captureSession.isRunning { - DispatchQueue.global(qos: .userInitiated).async { [weak self] in - self?.captureSession.startRunning() - } - } + let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) + previewLayer.videoGravity = .resizeAspectFill + previewLayer.connection?.videoOrientation = .portrait + self.previewLayer = previewLayer + + let settings: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) + ] + + videoOutput.videoSettings = settings + videoOutput.alwaysDiscardsLateVideoFrames = true + videoOutput.setSampleBufferDelegate(self, queue: queue) + if captureSession.canAddOutput(videoOutput) { + captureSession.addOutput(videoOutput) } - // Stops the video capture session. - public func stop() { - if captureSession.isRunning { - captureSession.stopRunning() - } + if captureSession.canAddOutput(cameraOutput) { + captureSession.addOutput(cameraOutput) } - - func updateVideoOrientation() { - guard let connection = videoOutput.connection(with: .video) else { return } - switch UIDevice.current.orientation { - case .portrait: - connection.videoOrientation = .portrait - case .portraitUpsideDown: - connection.videoOrientation = .portraitUpsideDown - case .landscapeRight: - connection.videoOrientation = .landscapeLeft - case .landscapeLeft: - connection.videoOrientation = .landscapeRight - default: - return - } - self.previewLayer?.connection?.videoOrientation = connection.videoOrientation + switch UIDevice.current.orientation { + case .portrait: + videoOutput.connection(with: .video)?.videoOrientation = .portrait + case .portraitUpsideDown: + videoOutput.connection(with: .video)?.videoOrientation = .portraitUpsideDown + case .landscapeRight: + videoOutput.connection(with: .video)?.videoOrientation = .landscapeLeft + case .landscapeLeft: + videoOutput.connection(with: .video)?.videoOrientation = .landscapeRight + default: + videoOutput.connection(with: .video)?.videoOrientation = .portrait } -} + if let connection = videoOutput.connection(with: .video) { + self.previewLayer?.connection?.videoOrientation = connection.videoOrientation + } + do { + try captureDevice.lockForConfiguration() + captureDevice.focusMode = .continuousAutoFocus + captureDevice.focusPointOfInterest = CGPoint(x: 0.5, y: 0.5) + captureDevice.exposureMode = .continuousAutoExposure + captureDevice.unlockForConfiguration() + } catch { + print("Unable to configure the capture device.") + return false + } -// Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. -extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { - public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - delegate?.videoCapture(self, didCaptureVideoFrame: sampleBuffer) + captureSession.commitConfiguration() + return true + } + + // Starts the video capture session. + public func start() { + if !captureSession.isRunning { + DispatchQueue.global(qos: .userInitiated).async { [weak self] in + self?.captureSession.startRunning() + } } + } - public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - // Optionally handle dropped frames, e.g., due to full buffer. + // Stops the video capture session. + public func stop() { + if captureSession.isRunning { + captureSession.stopRunning() } + } + + func updateVideoOrientation() { + guard let connection = videoOutput.connection(with: .video) else { return } + switch UIDevice.current.orientation { + case .portrait: + connection.videoOrientation = .portrait + case .portraitUpsideDown: + connection.videoOrientation = .portraitUpsideDown + case .landscapeRight: + connection.videoOrientation = .landscapeLeft + case .landscapeLeft: + connection.videoOrientation = .landscapeRight + default: + return + } + self.previewLayer?.connection?.videoOrientation = connection.videoOrientation + } + +} + +// Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. +extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { + public func captureOutput( + _ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + delegate?.videoCapture(self, didCaptureVideoFrame: sampleBuffer) + } + + public func captureOutput( + _ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + // Optionally handle dropped frames, e.g., due to full buffer. + } } diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index c51d3a7..10886d4 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -12,880 +12,915 @@ // the device's camera. import AVFoundation -import CoreMedia import CoreML +import CoreMedia import UIKit import Vision var mlModel = try! yolov8m(configuration: .init()).model enum Task { - case detect - case human + case detect + case human } class ViewController: UIViewController { - @IBOutlet var videoPreview: UIView! - @IBOutlet var View0: UIView! - @IBOutlet var segmentedControl: UISegmentedControl! - @IBOutlet weak var taskSegmentControl: UISegmentedControl! - @IBOutlet weak var trackingLabel: UILabel! - @IBOutlet weak var trackingSwitch: UISwitch! - @IBOutlet var playButtonOutlet: UIBarButtonItem! - @IBOutlet var pauseButtonOutlet: UIBarButtonItem! - @IBOutlet var slider: UISlider! - @IBOutlet var sliderConf: UISlider! - @IBOutlet weak var sliderConfLandScape: UISlider! - @IBOutlet var sliderIoU: UISlider! - @IBOutlet weak var sliderIoULandScape: UISlider! - @IBOutlet weak var labelName: UILabel! - @IBOutlet weak var labelFPS: UILabel! - @IBOutlet weak var labelZoom: UILabel! - @IBOutlet weak var labelVersion: UILabel! - @IBOutlet weak var labelSlider: UILabel! - @IBOutlet weak var labelSliderConf: UILabel! - @IBOutlet weak var labelSliderConfLandScape: UILabel! - @IBOutlet weak var labelSliderIoU: UILabel! - @IBOutlet weak var labelSliderIoULandScape: UILabel! - @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - - @IBOutlet weak var forcus: UIImageView! - - @IBOutlet weak var toolBar: UIToolbar! - - @IBOutlet weak var saveDataButton: UIBarButtonItem! - - let selection = UISelectionFeedbackGenerator() - var detector = try! VNCoreMLModel(for: mlModel) - var session: AVCaptureSession! - var videoCapture: VideoCapture! - var currentBuffer: CVPixelBuffer? - var framesDone = 0 - var t0 = 0.0 // inference start - var t1 = 0.0 // inference dt - var t2 = 0.0 // inference dt smoothed - var t3 = CACurrentMediaTime() // FPS start - var t4 = 0.0 // FPS dt smoothed - // var cameraOutput: AVCapturePhotoOutput! - - // Developer mode - var developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings - var save_detections = false // write every detection to detections.txt - let save_frames = false // write every frame to frames.txt - var save_strings:[String] = [] - let saveQueue = DispatchQueue(label: "com.ultralytics.saveQueue") - - lazy var visionRequest: VNCoreMLRequest = { - let request = VNCoreMLRequest(model: detector, completionHandler: { - [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - // NOTE: BoundingBoxView object scaling depends on request.imageCropAndScaleOption https://developer.apple.com/documentation/vision/vnimagecropandscaleoption - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - return request - }() - - var task: Task = .detect - var confidenceThreshold:Float = 0.25 - var iouThreshold:Float = 0.4 - var tracking = false - var tracker = TrackingModel() - - override func viewDidLoad() { - super.viewDidLoad() - NotificationCenter.default.addObserver(self, selector: #selector(updateDeveloperMode), name: .settingsChanged, object: nil) - slider.value = 30 - taskSegmentControl.selectedSegmentIndex = 0 - setLabels() - setUpBoundingBoxViews() - setUpOrientationChangeNotification() - startVideo() - // setModel() - } - - @objc func updateDeveloperMode() { - let userDefaults = UserDefaults.standard - developerMode = userDefaults.bool(forKey: "developer_mode") - if !developerMode { - save_detections = false - saveDataButton.isEnabled = false - saveDataButton.tintColor = UIColor.clear - } else { - saveDataButton.isEnabled = true - saveDataButton.tintColor = nil - } - } - - override func viewWillDisappear(_ animated: Bool) { - super.viewWillDisappear(animated) - if developerMode { - save_strings = [] - save_detections = false - saveDataButton.isEnabled = true - saveDataButton.tintColor = nil - } - } + @IBOutlet var videoPreview: UIView! + @IBOutlet var View0: UIView! + @IBOutlet var segmentedControl: UISegmentedControl! + @IBOutlet weak var taskSegmentControl: UISegmentedControl! + @IBOutlet weak var trackingLabel: UILabel! + @IBOutlet weak var trackingSwitch: UISwitch! + @IBOutlet var playButtonOutlet: UIBarButtonItem! + @IBOutlet var pauseButtonOutlet: UIBarButtonItem! + @IBOutlet var slider: UISlider! + @IBOutlet var sliderConf: UISlider! + @IBOutlet weak var sliderConfLandScape: UISlider! + @IBOutlet var sliderIoU: UISlider! + @IBOutlet weak var sliderIoULandScape: UISlider! + @IBOutlet weak var labelName: UILabel! + @IBOutlet weak var labelFPS: UILabel! + @IBOutlet weak var labelZoom: UILabel! + @IBOutlet weak var labelVersion: UILabel! + @IBOutlet weak var labelSlider: UILabel! + @IBOutlet weak var labelSliderConf: UILabel! + @IBOutlet weak var labelSliderConfLandScape: UILabel! + @IBOutlet weak var labelSliderIoU: UILabel! + @IBOutlet weak var labelSliderIoULandScape: UILabel! + @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - deinit { - NotificationCenter.default.removeObserver(self, name: .settingsChanged, object: nil) - } - - override func viewWillTransition(to size: CGSize, with coordinator: any UIViewControllerTransitionCoordinator) { - super.viewWillTransition(to: size, with: coordinator) - - if size.width > size.height { - labelSliderConf.isHidden = true - sliderConf.isHidden = true - labelSliderIoU.isHidden = true - sliderIoU.isHidden = true - toolBar.setBackgroundImage(UIImage(), forToolbarPosition: .any, barMetrics: .default) - toolBar.setShadowImage(UIImage(), forToolbarPosition: .any) - - labelSliderConfLandScape.isHidden = false - sliderConfLandScape.isHidden = false - labelSliderIoULandScape.isHidden = false - sliderIoULandScape.isHidden = false + @IBOutlet weak var forcus: UIImageView! - } else { - labelSliderConf.isHidden = false - sliderConf.isHidden = false - labelSliderIoU.isHidden = false - sliderIoU.isHidden = false - toolBar.setBackgroundImage(nil, forToolbarPosition: .any, barMetrics: .default) - toolBar.setShadowImage(nil, forToolbarPosition: .any) - - labelSliderConfLandScape.isHidden = true - sliderConfLandScape.isHidden = true - labelSliderIoULandScape.isHidden = true - sliderIoULandScape.isHidden = true - } - self.videoCapture.previewLayer?.frame = CGRect(x: 0, y: 0, width: size.width, height: size.height) + @IBOutlet weak var toolBar: UIToolbar! - } - - private func setUpOrientationChangeNotification() { - NotificationCenter.default.addObserver(self, selector: #selector(orientationDidChange), name: UIDevice.orientationDidChangeNotification, object: nil) - } - - @objc func orientationDidChange() { - videoCapture.updateVideoOrientation() + @IBOutlet weak var saveDataButton: UIBarButtonItem! + let selection = UISelectionFeedbackGenerator() + var detector = try! VNCoreMLModel(for: mlModel) + var session: AVCaptureSession! + var videoCapture: VideoCapture! + var currentBuffer: CVPixelBuffer? + var framesDone = 0 + var t0 = 0.0 // inference start + var t1 = 0.0 // inference dt + var t2 = 0.0 // inference dt smoothed + var t3 = CACurrentMediaTime() // FPS start + var t4 = 0.0 // FPS dt smoothed + // var cameraOutput: AVCapturePhotoOutput! + + // Developer mode + var developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings + var save_detections = false // write every detection to detections.txt + let save_frames = false // write every frame to frames.txt + var save_strings: [String] = [] + let saveQueue = DispatchQueue(label: "com.ultralytics.saveQueue") + + lazy var visionRequest: VNCoreMLRequest = { + let request = VNCoreMLRequest( + model: detector, + completionHandler: { + [weak self] request, error in + self?.processObservations(for: request, error: error) + }) + // NOTE: BoundingBoxView object scaling depends on request.imageCropAndScaleOption https://developer.apple.com/documentation/vision/vnimagecropandscaleoption + request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop + return request + }() + + var task: Task = .detect + var confidenceThreshold: Float = 0.25 + var iouThreshold: Float = 0.4 + var tracking = false + var tracker = TrackingModel() + + override func viewDidLoad() { + super.viewDidLoad() + NotificationCenter.default.addObserver( + self, selector: #selector(updateDeveloperMode), name: .settingsChanged, object: nil) + slider.value = 30 + taskSegmentControl.selectedSegmentIndex = 0 + setLabels() + setUpBoundingBoxViews() + setUpOrientationChangeNotification() + startVideo() + // setModel() + } + + @objc func updateDeveloperMode() { + let userDefaults = UserDefaults.standard + developerMode = userDefaults.bool(forKey: "developer_mode") + if !developerMode { + save_detections = false + saveDataButton.isEnabled = false + saveDataButton.tintColor = UIColor.clear + } else { + saveDataButton.isEnabled = true + saveDataButton.tintColor = nil + } + } + + override func viewWillDisappear(_ animated: Bool) { + super.viewWillDisappear(animated) + if developerMode { + save_strings = [] + save_detections = false + saveDataButton.isEnabled = true + saveDataButton.tintColor = nil + } + } + + deinit { + NotificationCenter.default.removeObserver(self, name: .settingsChanged, object: nil) + } + + override func viewWillTransition( + to size: CGSize, with coordinator: any UIViewControllerTransitionCoordinator + ) { + super.viewWillTransition(to: size, with: coordinator) + + if size.width > size.height { + labelSliderConf.isHidden = true + sliderConf.isHidden = true + labelSliderIoU.isHidden = true + sliderIoU.isHidden = true + toolBar.setBackgroundImage(UIImage(), forToolbarPosition: .any, barMetrics: .default) + toolBar.setShadowImage(UIImage(), forToolbarPosition: .any) + + labelSliderConfLandScape.isHidden = false + sliderConfLandScape.isHidden = false + labelSliderIoULandScape.isHidden = false + sliderIoULandScape.isHidden = false + + } else { + labelSliderConf.isHidden = false + sliderConf.isHidden = false + labelSliderIoU.isHidden = false + sliderIoU.isHidden = false + toolBar.setBackgroundImage(nil, forToolbarPosition: .any, barMetrics: .default) + toolBar.setShadowImage(nil, forToolbarPosition: .any) + + labelSliderConfLandScape.isHidden = true + sliderConfLandScape.isHidden = true + labelSliderIoULandScape.isHidden = true + sliderIoULandScape.isHidden = true } - - @IBAction func vibrate(_ sender: Any) { - selection.selectionChanged() - } - - @IBAction func indexChanged(_ sender: Any) { - selection.selectionChanged() - activityIndicator.startAnimating() - setModel() - setUpBoundingBoxViews() - activityIndicator.stopAnimating() - } - - func setModel() { - - /// Switch model - switch task { - case .detect: - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } - case .human: - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - if #available(iOS 15.0, *) { - mlModel = try! yolov8n_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 1: - self.labelName.text = "YOLOv8s" - if #available(iOS 15.0, *) { - mlModel = try! yolov8s_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 2: - self.labelName.text = "YOLOv8m" - if #available(iOS 15.0, *) { - mlModel = try! yolov8m_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 3: - self.labelName.text = "YOLOv8l" - if #available(iOS 15.0, *) { - mlModel = try! yolov8l_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - case 4: - self.labelName.text = "YOLOv8x" - if #available(iOS 15.0, *) { - mlModel = try! yolov8x_human(configuration: .init()).model - } else { - // Fallback on earlier versions - } - default: - break - } - + self.videoCapture.previewLayer?.frame = CGRect( + x: 0, y: 0, width: size.width, height: size.height) + + } + + private func setUpOrientationChangeNotification() { + NotificationCenter.default.addObserver( + self, selector: #selector(orientationDidChange), + name: UIDevice.orientationDidChangeNotification, object: nil) + } + + @objc func orientationDidChange() { + videoCapture.updateVideoOrientation() + + } + + @IBAction func vibrate(_ sender: Any) { + selection.selectionChanged() + } + + @IBAction func indexChanged(_ sender: Any) { + selection.selectionChanged() + activityIndicator.startAnimating() + setModel() + setUpBoundingBoxViews() + activityIndicator.stopAnimating() + } + + func setModel() { + + /// Switch model + switch task { + case .detect: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break + } + case .human: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + if #available(iOS 15.0, *) { + mlModel = try! yolov8n_human(configuration: .init()).model + } else { + // Fallback on earlier versions } - DispatchQueue.global(qos: .userInitiated).async { [self] in - - /// VNCoreMLModel - detector = try! VNCoreMLModel(for: mlModel) - detector.featureProvider = ThresholdProvider() - - /// VNCoreMLRequest - let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - visionRequest = request - t2 = 0.0 // inference dt smoothed - t3 = CACurrentMediaTime() // FPS start - t4 = 0.0 // FPS dt smoothed + case 1: + self.labelName.text = "YOLOv8s" + if #available(iOS 15.0, *) { + mlModel = try! yolov8s_human(configuration: .init()).model + } else { + // Fallback on earlier versions } - } - - /// Update thresholds from slider values - @IBAction func sliderChanged(_ sender: Any) { - self.confidenceThreshold = sliderConf.value - self.iouThreshold = sliderIoU.value - let conf = Double(round(100 * sliderConf.value)) / 100 - let iou = Double(round(100 * sliderIoU.value)) / 100 - self.labelSliderConf.text = String(conf) + " Confidence Threshold" - self.labelSliderIoU.text = String(iou) + " IoU Threshold" - detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) - } - - @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { - save_strings.removeAll() - saveDataButton.tintColor = nil - save_detections = false - - switch sender.selectedSegmentIndex { - case 0: - if self.task != .detect { - self.trackingLabel.isHidden = true - self.trackingSwitch.isHidden = true - self.task = .detect - self.setModel() - } - case 1: - if self.task != .human { - self.task = .human - for i in 0.. Double { - let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) + } + + func measureFPS() { + + } + + // Save text file + func saveText(text: String, file: String = "saved.txt") { + if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { + let fileURL = dir.appendingPathComponent(file) + + // Writing + do { // Append to file if it exists + let fileHandle = try FileHandle(forWritingTo: fileURL) + fileHandle.seekToEndOfFile() + fileHandle.write(text.data(using: .utf8)!) + fileHandle.closeFile() + } catch { // Create new file and write do { - let values = try fileURL.resourceValues(forKeys: [.volumeAvailableCapacityForImportantUsageKey]) - return Double(values.volumeAvailableCapacityForImportantUsage!) / 1E9 // Bytes to GB + try text.write(to: fileURL, atomically: false, encoding: .utf8) } catch { - print("Error retrieving storage capacity: \(error.localizedDescription)") + print("no file written") } - return 0 + } + + // Reading + // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} } - - // Return RAM usage (GB) - func memoryUsage() -> Double { - var taskInfo = mach_task_basic_info() - var count = mach_msg_type_number_t(MemoryLayout.size) / 4 - let kerr: kern_return_t = withUnsafeMutablePointer(to: &taskInfo) { - $0.withMemoryRebound(to: integer_t.self, capacity: 1) { - task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count) - } - } - if kerr == KERN_SUCCESS { - return Double(taskInfo.resident_size) / 1E9 // Bytes to GB - } else { - return 0 - } + } + + // Save image file + func saveImage() { + let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first + let fileURL = dir!.appendingPathComponent("saved.jpg") + let image = UIImage(named: "ultralytics_yolo_logotype.png") + FileManager.default.createFile( + atPath: fileURL.path, contents: image!.jpegData(compressionQuality: 0.5), attributes: nil) + } + + // Return hard drive space (GB) + func freeSpace() -> Double { + let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) + do { + let values = try fileURL.resourceValues(forKeys: [ + .volumeAvailableCapacityForImportantUsageKey + ]) + return Double(values.volumeAvailableCapacityForImportantUsage!) / 1E9 // Bytes to GB + } catch { + print("Error retrieving storage capacity: \(error.localizedDescription)") + } + return 0 + } + + // Return RAM usage (GB) + func memoryUsage() -> Double { + var taskInfo = mach_task_basic_info() + var count = mach_msg_type_number_t(MemoryLayout.size) / 4 + let kerr: kern_return_t = withUnsafeMutablePointer(to: &taskInfo) { + $0.withMemoryRebound(to: integer_t.self, capacity: 1) { + task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count) + } + } + if kerr == KERN_SUCCESS { + return Double(taskInfo.resident_size) / 1E9 // Bytes to GB + } else { + return 0 + } + } + + func show(predictions: [VNRecognizedObjectObservation], persons: [Person]) { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height + var str = "" + + var ratio: CGFloat = 1.0 + + if videoCapture.captureSession.sessionPreset == .photo { + ratio = (height / width) / (4.0 / 3.0) + } else { + ratio = (height / width) / (16.0 / 9.0) } - - func show(predictions: [VNRecognizedObjectObservation], persons: [Person]) { - let width = videoPreview.bounds.width - let height = videoPreview.bounds.height - var str = "" - - var ratio: CGFloat = 1.0 - - if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) - } else { - ratio = (height / width) / (16.0 / 9.0) - } - - let date = Date() - let calendar = Calendar.current - let hour = calendar.component(.hour, from: date) - let minutes = calendar.component(.minute, from: date) - let seconds = calendar.component(.second, from: date) - let nanoseconds = calendar.component(.nanosecond, from: date) - let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 - - var resultCount = 0 - + + let date = Date() + let calendar = Calendar.current + let hour = calendar.component(.hour, from: date) + let minutes = calendar.component(.minute, from: date) + let seconds = calendar.component(.second, from: date) + let nanoseconds = calendar.component(.nanosecond, from: date) + let sec_day = + Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 + + var resultCount = 0 + + switch task { + case .detect: + resultCount = predictions.count + case .human: + resultCount = persons.count + } + self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" + for i in 0..= 1 { - let offset = (1 - ratio) * (0.5 - displayRect.minX) - if task == .detect { - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - displayRect = displayRect.applying(transform) - } else { - let transform = CGAffineTransform(translationX: offset, y: 0) - displayRect = displayRect.applying(transform) - } - displayRect.size.width *= ratio - } else { - if task == .detect { - let offset = (ratio - 1) * (0.5 - displayRect.maxY) - - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - displayRect = displayRect.applying(transform) - } else { - let offset = (ratio - 1) * (0.5 - displayRect.minY) - let transform = CGAffineTransform(translationX: 0, y: offset) - displayRect = displayRect.applying(transform) - } - ratio = (height / width) / (3.0 / 4.0) - displayRect.size.height /= ratio - } - displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) - - boundingBoxViews[i].show(frame: displayRect, label: label, color: boxColor, alpha: alpha, innerTexts: innerTexts) - - if developerMode { - if save_detections { - saveQueue.async { [self] in - guard save_detections else {return} - var str = "" - switch task { - case .detect: - str += String(format: "%.3f,%.3f,%.3f,%@, %.2f,%.3f,%.3f,%.3f,%.3f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) - case .human: - let person = persons[i] - var id = "" - if person.index == -1 { - id = "-" - } else { - id = String(person.index) - } - str += String(format: "%.3f,%.3f,%.3f,%@, %.2f,%.3f,%.3f,%.3f,%.3f,%.2f,%.2f,%d,%@,%.2f,%@,%.2f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, id, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height, person.weight, person.height, person.age, person.gender, person.genderConfidence, person.race, person.raceConfidence) - } - - save_strings.append(str) - } - } - } - - } else { - boundingBoxViews[i].hide() - } + var displayRect = rect + switch UIDevice.current.orientation { + case .portraitUpsideDown: + displayRect = CGRect( + x: 1.0 - rect.origin.x - rect.width, + y: 1.0 - rect.origin.y - rect.height, + width: rect.width, + height: rect.height) + case .landscapeLeft: + displayRect = CGRect( + x: rect.origin.x, + y: rect.origin.y, + width: rect.width, + height: rect.height) + case .landscapeRight: + displayRect = CGRect( + x: rect.origin.x, + y: rect.origin.y, + width: rect.width, + height: rect.height) + case .unknown: + print("The device orientation is unknown, the predictions may be affected") + fallthrough + default: break } - } + if ratio >= 1 { + let offset = (1 - ratio) * (0.5 - displayRect.minX) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: offset, y: 0) + displayRect = displayRect.applying(transform) + } + displayRect.size.width *= ratio + } else { + if task == .detect { + let offset = (ratio - 1) * (0.5 - displayRect.maxY) - // Pinch to Zoom Start --------------------------------------------------------------------------------------------- - let minimumZoom: CGFloat = 1.0 - let maximumZoom: CGFloat = 10.0 - var lastZoomFactor: CGFloat = 1.0 - - @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { - let device = videoCapture.captureDevice - - // Return zoom value between the minimum and maximum zoom values - func minMaxZoom(_ factor: CGFloat) -> CGFloat { - return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + displayRect = displayRect.applying(transform) + } else { + let offset = (ratio - 1) * (0.5 - displayRect.minY) + let transform = CGAffineTransform(translationX: 0, y: offset) + displayRect = displayRect.applying(transform) + } + ratio = (height / width) / (3.0 / 4.0) + displayRect.size.height /= ratio } - - func update(scale factor: CGFloat) { - do { - try device.lockForConfiguration() - defer { - device.unlockForConfiguration() + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) + + boundingBoxViews[i].show( + frame: displayRect, label: label, color: boxColor, alpha: alpha, innerTexts: innerTexts) + + if developerMode { + if save_detections { + saveQueue.async { [self] in + guard save_detections else { return } + var str = "" + switch task { + case .detect: + str += String( + format: "%.3f,%.3f,%.3f,%@, %.2f,%.3f,%.3f,%.3f,%.3f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + case .human: + let person = persons[i] + var id = "" + if person.index == -1 { + id = "-" + } else { + id = String(person.index) } - device.videoZoomFactor = factor - } catch { - print("\(error.localizedDescription)") + str += String( + format: + "%.3f,%.3f,%.3f,%@, %.2f,%.3f,%.3f,%.3f,%.3f,%.2f,%.2f,%d,%@,%.2f,%@,%.2f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, id, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height, person.weight, + person.height, person.age, person.gender, person.genderConfidence, person.race, + person.raceConfidence) + } + + save_strings.append(str) } + } } - - let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) - switch pinch.state { - case .began: fallthrough - case .changed: - update(scale: newScaleFactor) - self.labelZoom.text = String(format: "%.2fx", newScaleFactor) - self.labelZoom.font = UIFont.preferredFont(forTextStyle: .title2) - case .ended: - lastZoomFactor = minMaxZoom(newScaleFactor) - update(scale: lastZoomFactor) - self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) - default: break + + } else { + boundingBoxViews[i].hide() + } + } + } + + // Pinch to Zoom Start --------------------------------------------------------------------------------------------- + let minimumZoom: CGFloat = 1.0 + let maximumZoom: CGFloat = 10.0 + var lastZoomFactor: CGFloat = 1.0 + + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { + let device = videoCapture.captureDevice + + // Return zoom value between the minimum and maximum zoom values + func minMaxZoom(_ factor: CGFloat) -> CGFloat { + return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) + } + + func update(scale factor: CGFloat) { + do { + try device.lockForConfiguration() + defer { + device.unlockForConfiguration() } - } // Pinch to Zoom Start + device.videoZoomFactor = factor + } catch { + print("\(error.localizedDescription)") + } + } + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) + switch pinch.state { + case .began, .changed: + update(scale: newScaleFactor) + self.labelZoom.text = String(format: "%.2fx", newScaleFactor) + self.labelZoom.font = UIFont.preferredFont(forTextStyle: .title2) + case .ended: + lastZoomFactor = minMaxZoom(newScaleFactor) + update(scale: lastZoomFactor) + self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) + default: break + } + } // Pinch to Zoom Start - // ------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------ } // ViewController class End extension ViewController: VideoCaptureDelegate { - func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { - predict(sampleBuffer: sampleBuffer) - } + func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { + predict(sampleBuffer: sampleBuffer) + } } // Programmatically save image extension ViewController: AVCapturePhotoCaptureDelegate { - func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) { - if let error = error { - print("error occurred : \(error.localizedDescription)") - } - if let dataImage = photo.fileDataRepresentation() { - let dataProvider = CGDataProvider(data: dataImage as CFData) - let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - var orientation = CGImagePropertyOrientation.right - switch UIDevice.current.orientation { - case .landscapeLeft: - orientation = .up - case .landscapeRight: - orientation = .down - default: - break - } - var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: .right) - if let orientedCIImage = CIImage(image: image)?.oriented(orientation), - let cgImage = CIContext().createCGImage(orientedCIImage, from: orientedCIImage.extent) { - image = UIImage(cgImage: cgImage) - - } - let imageView = UIImageView(image: image) - imageView.contentMode = .scaleAspectFill - imageView.frame = videoPreview.frame - let imageLayer = imageView.layer - var sublayers = videoPreview.layer.sublayers ?? [] - let insertIndex = max(sublayers.count - 1, 0) - videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) - - let bounds = UIScreen.main.bounds - UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) - self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) - let img = UIGraphicsGetImageFromCurrentImageContext() - UIGraphicsEndImageContext() - imageLayer.removeFromSuperlayer() - let activityViewController = UIActivityViewController(activityItems: [img!], applicationActivities: nil) - activityViewController.popoverPresentationController?.sourceView = self.View0 - self.present(activityViewController, animated: true, completion: nil) -// -// // Save to camera roll -// UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); - } else { - print("AVCapturePhotoCaptureDelegate Error") - } - } -} - + func photoOutput( + _ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error? + ) { + if let error = error { + print("error occurred : \(error.localizedDescription)") + } + if let dataImage = photo.fileDataRepresentation() { + let dataProvider = CGDataProvider(data: dataImage as CFData) + let cgImageRef: CGImage! = CGImage( + jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, + intent: .defaultIntent) + var orientation = CGImagePropertyOrientation.right + switch UIDevice.current.orientation { + case .landscapeLeft: + orientation = .up + case .landscapeRight: + orientation = .down + default: + break + } + var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: .right) + if let orientedCIImage = CIImage(image: image)?.oriented(orientation), + let cgImage = CIContext().createCGImage(orientedCIImage, from: orientedCIImage.extent) + { + image = UIImage(cgImage: cgImage) + } + let imageView = UIImageView(image: image) + imageView.contentMode = .scaleAspectFill + imageView.frame = videoPreview.frame + let imageLayer = imageView.layer + var sublayers = videoPreview.layer.sublayers ?? [] + let insertIndex = max(sublayers.count - 1, 0) + videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) + let bounds = UIScreen.main.bounds + UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) + self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) + let img = UIGraphicsGetImageFromCurrentImageContext() + UIGraphicsEndImageContext() + imageLayer.removeFromSuperlayer() + let activityViewController = UIActivityViewController( + activityItems: [img!], applicationActivities: nil) + activityViewController.popoverPresentationController?.sourceView = self.View0 + self.present(activityViewController, animated: true, completion: nil) + // + // // Save to camera roll + // UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); + } else { + print("AVCapturePhotoCaptureDelegate Error") + } + } +}