Banuba SDK
Loading...
Searching...
No Matches
camera_device.hpp
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Merged in single header MF based camera from Chromium project.
3
4#pragma once
5
6#include <bnb/utils/defs.hpp>
7
8#if BNB_OS_WINDOWS
9
10// clang-format off
11
12#include <cassert>
13#include <mutex>
14#include <functional>
15#include <atomic>
16#include <mfapi.h>
17#include <mferror.h>
18#include <list>
19#include <stddef.h>
20#include <wincodec.h>
21
22#include <thread>
23#include <utility>
24#include <wrl.h>
25
26
27#include <Windows.h>
28#pragma comment(lib,"Mfplat.lib")
29#pragma comment(lib,"Mf.lib")
30#pragma comment(lib,"Mfreadwrite.lib")
31#pragma comment(lib,"mfuuid.lib")
32#pragma comment(lib,"shlwapi.lib")
33
34using Microsoft::WRL::ComPtr;
35
36#include "mfcaptureengine.h"
37
38
39template <class T> void SafeRelease(T** ppT)
40{
41 if (*ppT)
42 {
43 (*ppT)->Release();
44 *ppT = NULL;
45 }
46}
47
48enum VideoPixelFormat {
49 PIXEL_FORMAT_UNKNOWN = 0, // Unknown or unspecified format value.
50 PIXEL_FORMAT_I420 =
51 1, // 12bpp YUV planar 1x1 Y, 2x2 UV samples, a.k.a. YU12.
52
53// Note: Chrome does not actually support YVU compositing, so you probably
54// don't actually want to use this. See http://crbug.com/784627.
55PIXEL_FORMAT_YV12 = 2, // 12bpp YVU planar 1x1 Y, 2x2 VU samples.
56
57PIXEL_FORMAT_I422 = 3, // 16bpp YUV planar 1x1 Y, 2x1 UV samples.
58PIXEL_FORMAT_I420A = 4, // 20bpp YUVA planar 1x1 Y, 2x2 UV, 1x1 A samples.
59PIXEL_FORMAT_I444 = 5, // 24bpp YUV planar, no subsampling.
60PIXEL_FORMAT_NV12 =
616, // 12bpp with Y plane followed by a 2x2 interleaved UV plane.
62PIXEL_FORMAT_NV21 =
637, // 12bpp with Y plane followed by a 2x2 interleaved VU plane.
64PIXEL_FORMAT_UYVY =
658, // 16bpp interleaved 2x1 U, 1x1 Y, 2x1 V, 1x1 Y samples.
66PIXEL_FORMAT_YUY2 =
679, // 16bpp interleaved 1x1 Y, 2x1 U, 1x1 Y, 2x1 V samples.
68PIXEL_FORMAT_ARGB = 10, // 32bpp BGRA (byte-order), 1 plane.
69PIXEL_FORMAT_XRGB = 11, // 24bpp BGRX (byte-order), 1 plane.
70PIXEL_FORMAT_RGB24 = 12, // 24bpp BGR (byte-order), 1 plane.
71
72/* PIXEL_FORMAT_RGB32 = 13, Deprecated */
73PIXEL_FORMAT_MJPEG = 14, // MJPEG compressed.
74/* PIXEL_FORMAT_MT21 = 15, Deprecated */
75
76// The P* in the formats below designates the number of bits per pixel
77// component. I.e. P9 is 9-bits per pixel component, P10 is 10-bits per pixel
78// component, etc.
79PIXEL_FORMAT_YUV420P9 = 16,
80PIXEL_FORMAT_YUV420P10 = 17,
81PIXEL_FORMAT_YUV422P9 = 18,
82PIXEL_FORMAT_YUV422P10 = 19,
83PIXEL_FORMAT_YUV444P9 = 20,
84PIXEL_FORMAT_YUV444P10 = 21,
85PIXEL_FORMAT_YUV420P12 = 22,
86PIXEL_FORMAT_YUV422P12 = 23,
87PIXEL_FORMAT_YUV444P12 = 24,
88
89/* PIXEL_FORMAT_Y8 = 25, Deprecated */
90PIXEL_FORMAT_Y16 = 26, // single 16bpp plane.
91
92PIXEL_FORMAT_ABGR = 27, // 32bpp RGBA (byte-order), 1 plane.
93PIXEL_FORMAT_XBGR = 28, // 24bpp RGBX (byte-order), 1 plane.
94
95PIXEL_FORMAT_P016LE = 29, // 24bpp NV12, 16 bits per channel
96
97PIXEL_FORMAT_XR30 =
9830, // 32bpp BGRX, 10 bits per channel, 2 bits ignored, 1 plane
99PIXEL_FORMAT_XB30 =
10031, // 32bpp RGBX, 10 bits per channel, 2 bits ignored, 1 plane
101
102PIXEL_FORMAT_BGRA = 32, // 32bpp ARGB (byte-order), 1 plane.
103
104// Please update UMA histogram enumeration when adding new formats here.
105PIXEL_FORMAT_MAX =
106PIXEL_FORMAT_BGRA, // Must always be equal to largest entry logged.
107};
108
109// This list is ordered by precedence of use.
110static VideoPixelFormat const kSupportedCapturePixelFormats[] = {
111 PIXEL_FORMAT_NV12, PIXEL_FORMAT_I420, PIXEL_FORMAT_YV12,
112 PIXEL_FORMAT_NV21, PIXEL_FORMAT_UYVY, PIXEL_FORMAT_YUY2,
113 PIXEL_FORMAT_RGB24, PIXEL_FORMAT_ARGB, PIXEL_FORMAT_MJPEG,
114};
115
116
117namespace gfx {
118 struct Size {
119 int width_ = 0;
120 int height_ = 0;
121 void SetWidthAndHeight(int w, int h) {
122 width_ = w;
123 height_ = h;
124 }
125 int width() const { return width_; }
126 int height() const { return height_; }
127 };
128}
129
130
131
132struct VideoCaptureFormat {
133 gfx::Size frame_size;
134 float frame_rate = 0;
135 VideoPixelFormat pixel_format = PIXEL_FORMAT_UNKNOWN;
136};
137
138HRESULT CreateCaptureEngine(IMFCaptureEngine** engine) {
139 ComPtr<IMFCaptureEngineClassFactory> capture_engine_class_factory;
140 HRESULT hr = CoCreateInstance(CLSID_MFCaptureEngineClassFactory, nullptr,
141 CLSCTX_INPROC_SERVER,
142 IID_PPV_ARGS(&capture_engine_class_factory));
143 if (FAILED(hr))
144 return hr;
145
146 return capture_engine_class_factory->CreateInstance(CLSID_MFCaptureEngine,
147 IID_PPV_ARGS(engine));
148}
149
150HRESULT CopyAttribute(IMFAttributes* source_attributes,
151 IMFAttributes* destination_attributes,
152 const GUID& key) {
153 PROPVARIANT var;
154 PropVariantInit(&var);
155 HRESULT hr = source_attributes->GetItem(key, &var);
156 if (FAILED(hr))
157 return hr;
158
159 hr = destination_attributes->SetItem(key, var);
160 PropVariantClear(&var);
161 return hr;
162}
163
164HRESULT CreateVideoDeviceSource(IMFMediaSource** ppSource, size_t index)
165{
166 UINT32 count = 0;
167
168 *ppSource = NULL;
169
170 IMFMediaSource* pSource = NULL;
171 IMFAttributes* pAttributes = NULL;
172 IMFActivate** ppDevices = NULL;
173
174 // Create an attribute store to specify the enumeration parameters.
175 HRESULT hr = MFCreateAttributes(&pAttributes, 1);
176 if (FAILED(hr))
177 {
178 goto done;
179 }
180
181 // Source type: video capture devices
182 hr = pAttributes->SetGUID(
183 MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE,
184 MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID
185 );
186 if (FAILED(hr))
187 {
188 goto done;
189 }
190
191 // Enumerate devices.
192 hr = MFEnumDeviceSources(pAttributes, &ppDevices, &count);
193 if (FAILED(hr))
194 {
195 goto done;
196 }
197
198 if (count == 0 || index >= count)
199 {
200 hr = E_FAIL;
201 goto done;
202 }
203
204 // Create the media source object.
205 hr = ppDevices[index]->ActivateObject(IID_PPV_ARGS(&pSource));
206 if (FAILED(hr))
207 {
208 goto done;
209 }
210
211 *ppSource = pSource;
212 (*ppSource)->AddRef();
213
214done:
215 SafeRelease(&pAttributes);
216
217 for (DWORD i = 0; i < count; i++)
218 {
219 SafeRelease(&ppDevices[i]);
220 }
221 CoTaskMemFree(ppDevices);
222 SafeRelease(&pSource);
223 return hr;
224}
225
226
227bool GetFrameSizeFromMediaType(IMFMediaType* type, gfx::Size* frame_size) {
228 UINT32 width32, height32;
229 if (FAILED(MFGetAttributeSize(type, MF_MT_FRAME_SIZE, &width32, &height32)))
230 return false;
231 frame_size->SetWidthAndHeight(width32, height32);
232 return true;
233}
234
235struct MediaFormatConfiguration {
236 GUID mf_source_media_subtype;
237 GUID mf_sink_media_subtype;
238 VideoPixelFormat pixel_format;
239};
240
241bool GetMediaFormatConfigurationFromMFSourceMediaSubtype(
242 const GUID& mf_source_media_subtype,
243 MediaFormatConfiguration* media_format_configuration) {
244 static const MediaFormatConfiguration kMediaFormatConfigurationMap[] = {
245 // IMFCaptureEngine inevitably performs the video frame decoding itself.
246 // This means that the sink must always be set to an uncompressed video
247 // format.
248
249 // Since chromium uses I420 at the other end of the pipe, MF known video
250 // output formats are always set to I420.
251 {MFVideoFormat_I420, MFVideoFormat_I420, PIXEL_FORMAT_I420},
252 {MFVideoFormat_YUY2, MFVideoFormat_I420, PIXEL_FORMAT_I420},
253 {MFVideoFormat_UYVY, MFVideoFormat_I420, PIXEL_FORMAT_I420},
254 {MFVideoFormat_RGB24, MFVideoFormat_I420, PIXEL_FORMAT_I420},
255 {MFVideoFormat_RGB32, MFVideoFormat_I420, PIXEL_FORMAT_I420},
256 {MFVideoFormat_ARGB32, MFVideoFormat_I420, PIXEL_FORMAT_I420},
257 {MFVideoFormat_MJPG, MFVideoFormat_I420, PIXEL_FORMAT_I420},
258 {MFVideoFormat_NV12, MFVideoFormat_NV12, PIXEL_FORMAT_NV12},
259 {MFVideoFormat_YV12, MFVideoFormat_I420, PIXEL_FORMAT_I420},
260
261 /*
262 // Depth cameras use 16-bit uncompressed video formats.
263 // We ask IMFCaptureEngine to let the frame pass through, without
264 // transcoding, since transcoding would lead to precision loss.
265 {kMediaSubTypeY16, kMediaSubTypeY16, PIXEL_FORMAT_Y16},
266 {kMediaSubTypeZ16, kMediaSubTypeZ16, PIXEL_FORMAT_Y16},
267 {kMediaSubTypeINVZ, kMediaSubTypeINVZ, PIXEL_FORMAT_Y16},
268 {MFVideoFormat_D16, MFVideoFormat_D16, PIXEL_FORMAT_Y16},
269 */
270
271 // Photo type
272 {GUID_ContainerFormatJpeg, GUID_ContainerFormatJpeg, PIXEL_FORMAT_MJPEG} };
273
274 for (const auto& kMediaFormatConfiguration : kMediaFormatConfigurationMap) {
275 if (IsEqualGUID(kMediaFormatConfiguration.mf_source_media_subtype,
276 mf_source_media_subtype)) {
277 *media_format_configuration = kMediaFormatConfiguration;
278 return true;
279 }
280 }
281
282 return false;
283}
284
285
286bool GetPixelFormatFromMFSourceMediaSubtype(const GUID& mf_source_media_subtype, VideoPixelFormat* pixel_format) {
287 MediaFormatConfiguration media_format_configuration;
288 if (!GetMediaFormatConfigurationFromMFSourceMediaSubtype(
289 mf_source_media_subtype, &media_format_configuration))
290 return false;
291 *pixel_format = media_format_configuration.pixel_format;
292 return true;
293}
294
295// Calculate sink subtype based on source subtype. |passthrough| is set when
296// sink and source are the same and means that there should be no transcoding
297// done by IMFCaptureEngine.
298HRESULT GetMFSinkMediaSubtype(IMFMediaType* source_media_type,
299 GUID* mf_sink_media_subtype,
300 bool* passthrough) {
301 GUID source_subtype;
302 HRESULT hr = source_media_type->GetGUID(MF_MT_SUBTYPE, &source_subtype);
303 if (FAILED(hr))
304 return hr;
305 MediaFormatConfiguration media_format_configuration;
306 if (!GetMediaFormatConfigurationFromMFSourceMediaSubtype(
307 source_subtype, &media_format_configuration))
308 return E_FAIL;
309 *mf_sink_media_subtype = media_format_configuration.mf_sink_media_subtype;
310 *passthrough =
311 IsEqualGUID(media_format_configuration.mf_sink_media_subtype, source_subtype);
312 return S_OK;
313}
314
315HRESULT ConvertToVideoSinkMediaType(IMFMediaType* source_media_type,
316 IMFMediaType* sink_media_type, UINT32 frame_rate) {
317 HRESULT hr = sink_media_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
318 if (FAILED(hr))
319 return hr;
320
321 bool passthrough = false;
322 GUID mf_sink_media_subtype;
323 hr = GetMFSinkMediaSubtype(source_media_type, &mf_sink_media_subtype,
324 &passthrough);
325 if (FAILED(hr))
326 return hr;
327
328 hr = sink_media_type->SetGUID(MF_MT_SUBTYPE, mf_sink_media_subtype);
329 // Copying attribute values for passthrough mode is redundant, since the
330 // format is kept unchanged, and causes AddStream error MF_E_INVALIDMEDIATYPE.
331 if (FAILED(hr)/* || passthrough*/)
332 return hr;
333
334 hr = CopyAttribute(source_media_type, sink_media_type, MF_MT_FRAME_SIZE);
335 if (FAILED(hr))
336 return hr;
337
338 // set reuqested frame rate
339 hr = MFSetAttributeRatio(sink_media_type, MF_MT_FRAME_RATE, frame_rate, 1);
340 if (FAILED(hr))
341 return hr;
342 ///
343
344 hr = CopyAttribute(source_media_type, sink_media_type,
345 MF_MT_PIXEL_ASPECT_RATIO);
346 if (FAILED(hr))
347 return hr;
348
349 return CopyAttribute(source_media_type, sink_media_type,
350 MF_MT_INTERLACE_MODE);
351}
352
353
354struct CapabilityWin {
355 CapabilityWin(int media_type_index, const VideoCaptureFormat& format)
356 : media_type_index(media_type_index),
357 supported_format(format),
358 info_header(),
359 stream_index(0) {}
360
361 // Used by VideoCaptureDeviceWin.
362 CapabilityWin(int media_type_index,
363 const VideoCaptureFormat& format,
364 const BITMAPINFOHEADER& info_header)
365 : media_type_index(media_type_index),
366 supported_format(format),
367 info_header(info_header),
368 stream_index(0) {}
369
370 // Used by VideoCaptureDeviceMFWin.
371 CapabilityWin(int media_type_index,
372 const VideoCaptureFormat& format,
373 int stream_index)
374 : media_type_index(media_type_index),
375 supported_format(format),
376 info_header(),
377 stream_index(stream_index) {}
378
379 const int media_type_index;
380 const VideoCaptureFormat supported_format;
381
382 // |info_header| is only valid if DirectShow is used.
383 const BITMAPINFOHEADER info_header;
384
385 // |stream_index| is only valid if MediaFoundation is used.
386 const int stream_index;
387};
388
389typedef std::list<CapabilityWin> CapabilityList;
390
391
392// static
393bool ComparePixelFormatPreference(
394 const VideoPixelFormat& lhs,
395 const VideoPixelFormat& rhs) {
396 auto* format_lhs = std::find(
397 kSupportedCapturePixelFormats,
398 kSupportedCapturePixelFormats + std::size(kSupportedCapturePixelFormats),
399 lhs);
400 auto* format_rhs = std::find(
401 kSupportedCapturePixelFormats,
402 kSupportedCapturePixelFormats + std::size(kSupportedCapturePixelFormats),
403 rhs);
404 return format_lhs < format_rhs;
405}
406
407
408// Compares the priority of the capture formats. Returns true if |lhs| is the
409// preferred capture format in comparison with |rhs|. Returns false otherwise.
410bool CompareCapability(const VideoCaptureFormat& requested,
411 const VideoCaptureFormat& lhs,
412 const VideoCaptureFormat& rhs) {
413 // When 16-bit format is requested and available, avoid other formats.
414 // If both lhs and rhs are 16-bit, we still need to compare them based on
415 // height, width and frame rate.
416 const bool use_requested =
417 (requested.pixel_format == PIXEL_FORMAT_Y16);
418 if (use_requested && lhs.pixel_format != rhs.pixel_format) {
419 if (lhs.pixel_format == requested.pixel_format)
420 return true;
421 if (rhs.pixel_format == requested.pixel_format)
422 return false;
423 }
424 const int diff_height_lhs =
425 std::abs(lhs.frame_size.height() - requested.frame_size.height());
426 const int diff_height_rhs =
427 std::abs(rhs.frame_size.height() - requested.frame_size.height());
428 if (diff_height_lhs != diff_height_rhs)
429 return diff_height_lhs < diff_height_rhs;
430
431 const int diff_width_lhs =
432 std::abs(lhs.frame_size.width() - requested.frame_size.width());
433 const int diff_width_rhs =
434 std::abs(rhs.frame_size.width() - requested.frame_size.width());
435 if (diff_width_lhs != diff_width_rhs)
436 return diff_width_lhs < diff_width_rhs;
437
438 const float diff_fps_lhs = std::fabs(lhs.frame_rate - requested.frame_rate);
439 const float diff_fps_rhs = std::fabs(rhs.frame_rate - requested.frame_rate);
440 if (diff_fps_lhs != diff_fps_rhs)
441 return diff_fps_lhs < diff_fps_rhs;
442
443 return ComparePixelFormatPreference(lhs.pixel_format,
444 rhs.pixel_format);
445}
446
447const CapabilityWin& GetBestMatchedCapability(
448 const VideoCaptureFormat& requested,
449 const CapabilityList& capabilities) {
450
451 const CapabilityWin* best_match = &(*capabilities.begin());
452 for (const CapabilityWin& capability : capabilities) {
453 if (CompareCapability(requested, capability.supported_format,
454 best_match->supported_format)) {
455 best_match = &capability;
456 }
457 }
458 return *best_match;
459}
460
461
462
463
464// Locks the given buffer using the fastest supported method when constructed,
465// and automatically unlocks the buffer when destroyed.
466class ScopedBufferLock {
467public:
468 explicit ScopedBufferLock(ComPtr<IMFMediaBuffer> buffer)
469 : buffer_(std::move(buffer)) {
470 if (FAILED(buffer_.As(&buffer_2d_))) {
471 LockSlow();
472 return;
473 }
474 // Try lock methods from fastest to slowest: Lock2DSize(), then Lock2D(),
475 // then finally LockSlow().
476 if (Lock2DSize() || Lock2D()) {
477 if (IsContiguous())
478 return;
479 buffer_2d_->Unlock2D();
480 }
481 // Fall back to LockSlow() if 2D buffer was unsupported or noncontiguous.
482 buffer_2d_ = nullptr;
483 LockSlow();
484 }
485
486 // Returns whether |buffer_2d_| is contiguous with positive pitch, i.e., the
487 // buffer format that the surrounding code expects.
488 bool IsContiguous() {
489 BOOL is_contiguous;
490 return pitch_ > 0 &&
491 SUCCEEDED(buffer_2d_->IsContiguousFormat(&is_contiguous)) &&
492 is_contiguous &&
493 (length_ || SUCCEEDED(buffer_2d_->GetContiguousLength(&length_)));
494 }
495
496 bool Lock2DSize() {
497 ComPtr<IMF2DBuffer2> buffer_2d_2;
498 if (FAILED(buffer_.As(&buffer_2d_2)))
499 return false;
500 BYTE* data_start;
501 return SUCCEEDED(buffer_2d_2->Lock2DSize(MF2DBuffer_LockFlags_Read, &data_,
502 &pitch_, &data_start, &length_));
503 }
504
505 bool Lock2D() { return SUCCEEDED(buffer_2d_->Lock2D(&data_, &pitch_)); }
506
507 void LockSlow() {
508 DWORD max_length = 0;
509 buffer_->Lock(&data_, &max_length, &length_);
510 }
511
512 ~ScopedBufferLock() {
513 if (buffer_2d_)
514 buffer_2d_->Unlock2D();
515 else
516 buffer_->Unlock();
517 }
518
519 ScopedBufferLock(const ScopedBufferLock&) = delete;
520 ScopedBufferLock& operator=(const ScopedBufferLock&) = delete;
521
522 BYTE* data() const { return data_; }
523 DWORD length() const { return length_; }
524 LONG pitch() const { return pitch_; }
525
526private:
527 ComPtr<IMFMediaBuffer> buffer_;
528 ComPtr<IMF2DBuffer> buffer_2d_;
529 BYTE* data_ = nullptr;
530 DWORD length_ = 0;
531 LONG pitch_ = 0;
532};
533
534template <typename T>
535class MFVideoCallback final
536 :
537 public IMFCaptureEngineOnSampleCallback,
538 public IMFCaptureEngineOnEventCallback {
539public:
540 MFVideoCallback(T* observer) : observer_(observer) {}
541
542 IFACEMETHODIMP QueryInterface(REFIID riid, void** object) override {
543 HRESULT hr = E_NOINTERFACE;
544 if (IsEqualGUID(riid, IID_IUnknown)) {
545 *object = this;
546 hr = S_OK;
547 }
548 else if (IsEqualGUID(riid, IID_IMFCaptureEngineOnSampleCallback)) {
549 *object = static_cast<IMFCaptureEngineOnSampleCallback*>(this);
550 hr = S_OK;
551 }
552 else if (IsEqualGUID(riid, IID_IMFCaptureEngineOnEventCallback)) {
553 *object = static_cast<IMFCaptureEngineOnEventCallback*>(this);
554 hr = S_OK;
555 }
556 if (SUCCEEDED(hr))
557 AddRef();
558
559 return hr;
560 }
561
562 IFACEMETHODIMP_(ULONG) AddRef() override {
563 return InterlockedIncrement(&m_cRef);
564 }
565
566 IFACEMETHODIMP_(ULONG) Release() override {
567 ULONG l = InterlockedDecrement(&m_cRef);
568 if (0 == l) delete this;
569 return l;
570 }
571
572 IFACEMETHODIMP OnEvent(IMFMediaEvent* media_event) override {
573 if (!observer_) {
574 return S_OK;
575 }
576
577 GUID capture_event_guid = GUID_NULL;
578 {
579 std::lock_guard<std::mutex> guard(lock_);
580 observer_->OnEvent(media_event);
581 if (HRESULT hr = media_event->GetExtendedType(&capture_event_guid); FAILED(hr)) {
582 return hr;
583 }
584 }
585 return S_OK;
586 }
587
588 IFACEMETHODIMP OnSample(IMFSample* sample) override {
589 std::lock_guard<std::mutex> guard(lock_);
590
591 if (!observer_) {
592 return S_OK;
593 }
594 if (!sample) {
595 observer_->OnFrameDropped();
596 return S_OK;
597 }
598 /*
599 base::TimeTicks reference_time(base::TimeTicks::Now());
600 LONGLONG raw_time_stamp = 0;
601 sample->GetSampleTime(&raw_time_stamp);
602 base::TimeDelta timestamp =
603 base::TimeDelta::FromMicroseconds(raw_time_stamp / 10);
604 */
605
606 DWORD count = 0;
607 sample->GetBufferCount(&count);
608
609 for (DWORD i = 0; i < count; ++i) {
610 ComPtr<IMFMediaBuffer> buffer;
611 sample->GetBufferByIndex(i, &buffer);
612 if (buffer) {
613 auto locked_buffer = std::make_shared<ScopedBufferLock>(buffer);
614 if (locked_buffer->data()) {
615 observer_->OnIncomingCapturedData(std::move(locked_buffer));
616 }
617 else {
618 observer_->OnFrameDropped();
619 }
620 }
621 else {
622 observer_->OnFrameDropped();
623 }
624 }
625 return S_OK;
626 }
627
628 void Shutdown() {
629 std::lock_guard<std::mutex> guard(lock_);
630 observer_ = nullptr;
631 }
632
633private:
634 ULONG m_cRef = 0;
635 ~MFVideoCallback() {}
636 std::mutex lock_;
637 T* observer_;
638};
639
640class VideoCaptureDeviceMFWin {
641public:
642 using callback_t = std::function<void(std::shared_ptr<ScopedBufferLock> lock)>;
643 VideoCaptureDeviceMFWin(ComPtr<IMFMediaSource>& source)
644 :source_(source) {
645 camera_stop_ = ::CreateEvent(NULL, TRUE, FALSE, "CameraStop");
646 capture_error_ = ::CreateEvent(NULL, TRUE, FALSE, "CaptureError");
647 capture_initialize_ = ::CreateEvent(NULL, TRUE, FALSE, "CaptureInitialize");
648 }
649
650 void SetCallback(callback_t callback) {
651 std::lock_guard<std::mutex> lock(mutex_);
652 callback_ = callback;
653 }
654
655 ~VideoCaptureDeviceMFWin() {
656 ::CloseHandle(camera_stop_);
657 ::CloseHandle(capture_error_);
658 ::CloseHandle(capture_initialize_);
659 }
660
661 void OnEvent(IMFMediaEvent* media_event) {
662 HRESULT hr;
663 GUID capture_event_guid = GUID_NULL;
664
665 media_event->GetStatus(&hr);
666 media_event->GetExtendedType(&capture_event_guid);
667 if (IsEqualGUID(capture_event_guid, MF_CAPTURE_ENGINE_ERROR) || FAILED(hr)) {
668 ::SetEvent(capture_error_);
669 }
670 else if (IsEqualGUID(capture_event_guid, MF_CAPTURE_ENGINE_INITIALIZED)) {
671 ::SetEvent(capture_initialize_);
672 }
673 else if (IsEqualGUID(capture_event_guid, MF_CAPTURE_ENGINE_PREVIEW_STOPPED)) {
674 ::SetEvent(camera_stop_);
675 }
676 }
677
678 void OnFrameDropped() {
679
680 }
681
682
683 void OnIncomingCapturedData(std::shared_ptr<ScopedBufferLock> buffer_lock) {
684 std::lock_guard<std::mutex> lock(mutex_);
685 if (callback_) {
686 callback_(std::move(buffer_lock));
687 }
688 }
689
690 HRESULT Init() {
691 HRESULT hr = S_OK;
692
693 if (!engine_) {
694 hr = CreateCaptureEngine(&engine_);
695 if (FAILED(hr)) {
696 assert(0);
697 return hr;
698 }
699 }
700
701 ComPtr<IMFAttributes> attributes;
702 hr = MFCreateAttributes(&attributes, 1);
703 if (FAILED(hr)) {
704 assert(0);
705 return hr;
706 }
707
708 hr = attributes->SetUINT32(MF_CAPTURE_ENGINE_USE_VIDEO_DEVICE_ONLY, TRUE);
709 if (FAILED(hr)) {
710 assert(0);
711 return hr;
712 }
713
714
715 video_callback_ = new MFVideoCallback<VideoCaptureDeviceMFWin>(this);
716
717
718 hr = engine_->Initialize(video_callback_.Get(), attributes.Get(), nullptr, source_.Get());
719 if (FAILED(hr)) {
720 assert(0);
721 return hr;
722 }
723
724 HANDLE events[] = { capture_initialize_, capture_error_ };
725
726 DWORD wait_result = ::WaitForMultipleObjects(2, events, FALSE, INFINITE);
727 switch (wait_result) {
728 case WAIT_OBJECT_0:
729 break;
730 case WAIT_FAILED:
731 hr = HRESULT_FROM_WIN32(::GetLastError());
732 assert(0);
733 break;
734 default:
735 hr = E_UNEXPECTED;
736 assert(0);
737 break;
738 }
739 return hr;
740 }
741
742 bool GetFrameRateFromMediaType(IMFMediaType* type, float* frame_rate) {
743 UINT32 numerator, denominator;
744 if (FAILED(MFGetAttributeRatio(type, MF_MT_FRAME_RATE, &numerator,
745 &denominator)) ||
746 !denominator) {
747 return false;
748 }
749 *frame_rate = static_cast<float>(numerator) / denominator;
750 return true;
751 }
752
753 bool GetFormatFromSourceMediaType(IMFMediaType* source_media_type, bool photo, VideoCaptureFormat* format) {
754 GUID major_type_guid;
755 if (FAILED(source_media_type->GetGUID(MF_MT_MAJOR_TYPE, &major_type_guid)) ||
756 (!IsEqualGUID(major_type_guid, MFMediaType_Image) &&
757 (photo ||
758 !GetFrameRateFromMediaType(source_media_type, &format->frame_rate)))) {
759 return false;
760 }
761
762 GUID sub_type_guid;
763 if (FAILED(source_media_type->GetGUID(MF_MT_SUBTYPE, &sub_type_guid)) ||
764 !GetFrameSizeFromMediaType(source_media_type, &format->frame_size) ||
765 !GetPixelFormatFromMFSourceMediaSubtype(
766 sub_type_guid, &format->pixel_format)) {
767 return false;
768 }
769
770 return true;
771 }
772
773
774 HRESULT FillCapabilities(IMFCaptureSource* source, CapabilityList* capabilities) {
775 DWORD stream_count = 0;
776 HRESULT hr = source->GetDeviceStreamCount(&stream_count);
777 if (FAILED(hr)) { assert(0); }
778
779 for (DWORD stream_index = 0; stream_index < stream_count; stream_index++) {
780 MF_CAPTURE_ENGINE_STREAM_CATEGORY stream_category;
781 hr = source->GetDeviceStreamCategory(stream_index, &stream_category);
782 if (FAILED(hr)) { assert(0); }
783 if (stream_category != MF_CAPTURE_ENGINE_STREAM_CATEGORY_VIDEO_PREVIEW &&
784 stream_category != MF_CAPTURE_ENGINE_STREAM_CATEGORY_VIDEO_CAPTURE) {
785 continue;
786 }
787
788 DWORD media_type_index = 0;
789 ComPtr<IMFMediaType> type;
790
791 while (SUCCEEDED(source->GetAvailableDeviceMediaType(stream_index, media_type_index, &type))) {
792 VideoCaptureFormat format;
793 if (GetFormatFromSourceMediaType(type.Get(), false, &format))
794 capabilities->emplace_back(media_type_index, format, stream_index);
795 type.Reset();
796 ++media_type_index;
797 }
798 if (hr == MF_E_NO_MORE_TYPES) {
799 hr = S_OK;
800 }
801 if (FAILED(hr)) {
802 return hr;
803 }
804 }
805 return hr;
806 }
807
808 HRESULT AllocateAndStart(int width, int height, UINT32 frame_rate, VideoPixelFormat& pixel_format) {
809 ComPtr<IMFCaptureSource> source;
810 HRESULT hr = engine_->GetSource(&source);
811 if (FAILED(hr)) { return hr; }
812
813 CapabilityList video_capabilities;
814 FillCapabilities(source.Get(), &video_capabilities);
815
816 VideoCaptureFormat requested;
817 requested.frame_size.SetWidthAndHeight(width, height);
818 requested.frame_rate = static_cast<float>(frame_rate);
819 requested.pixel_format = pixel_format;
820
821 const CapabilityWin best_match_video_capability =
822 GetBestMatchedCapability(requested, video_capabilities);
823 if (best_match_video_capability.supported_format.frame_size.width() != width ||
824 best_match_video_capability.supported_format.frame_size.height() != height) {
825 return -1;
826 }
827
828 pixel_format = best_match_video_capability.supported_format.pixel_format;
829
830 ComPtr<IMFMediaType> source_video_media_type;
831 hr = source->GetAvailableDeviceMediaType(best_match_video_capability.stream_index,
832 best_match_video_capability.media_type_index, &source_video_media_type);
833 if (FAILED(hr)) { return hr; }
834
835 hr = source->SetCurrentDeviceMediaType(
836 best_match_video_capability.stream_index, source_video_media_type.Get());
837 if (FAILED(hr)) { return hr; }
838
839 ComPtr<IMFCaptureSink> sink;
840 hr = engine_->GetSink(MF_CAPTURE_ENGINE_SINK_TYPE_PREVIEW, &sink);
841 if (FAILED(hr)) { return hr; }
842
843 ComPtr<IMFCapturePreviewSink> preview_sink;
844 hr = sink->QueryInterface(IID_PPV_ARGS(&preview_sink));
845 if (FAILED(hr)) { return hr; }
846
847 hr = preview_sink->RemoveAllStreams();
848 if (FAILED(hr)) { return hr; }
849
850 ComPtr<IMFMediaType> sink_video_media_type;
851 hr = MFCreateMediaType(&sink_video_media_type);
852 if (FAILED(hr)) { return hr; }
853
854 hr = ConvertToVideoSinkMediaType(source_video_media_type.Get(), sink_video_media_type.Get(), frame_rate);
855 if (FAILED(hr)) { return hr; }
856
857 DWORD dw_sink_stream_index = 0;
858 hr = preview_sink->AddStream(best_match_video_capability.stream_index,
859 sink_video_media_type.Get(), nullptr,
860 &dw_sink_stream_index);
861 if (FAILED(hr)) { return hr; }
862
863 hr = preview_sink->SetSampleCallback(dw_sink_stream_index,
864 video_callback_.Get());
865 if (FAILED(hr)) { return hr; }
866
867 // Note, that it is not sufficient to wait for
868 // MF_CAPTURE_ENGINE_PREVIEW_STARTED as an indicator that starting capture has
869 // succeeded. If the capture device is already in use by a different
870 // application, MediaFoundation will still emit
871 // MF_CAPTURE_ENGINE_PREVIEW_STARTED, and only after that raise an error
872 // event. For the lack of any other events indicating success, we have to wait
873 // for the first video frame to arrive before sending our |OnStarted| event to
874 // |client_|.
875 hr = engine_->StartPreview();
876 if (FAILED(hr)) { return hr; }
877
878 return S_OK;
879 }
880
881 HRESULT Stop() {
882 HRESULT hr = engine_->StopPreview();
883 if (FAILED(hr)) {
884 return hr;
885 }
886
887 HANDLE events[] = { camera_stop_, capture_error_ };
888
889 DWORD wait_result = ::WaitForMultipleObjects(2, events, FALSE, INFINITE);
890 if(wait_result == WAIT_OBJECT_0) {
891 return S_OK;
892 }
893 return E_FAIL;
894 }
895
896private:
897 HANDLE camera_stop_;
898 HANDLE capture_error_;
899 HANDLE capture_initialize_;
900 ComPtr<MFVideoCallback<VideoCaptureDeviceMFWin>> video_callback_;
901 ComPtr<IMFMediaSource> source_;
902 ComPtr<IMFCaptureEngine> engine_;
903 std::mutex mutex_;
904 callback_t callback_ = nullptr;
905};
906
907// clang-format on
908
909#endif