Banuba SDK
camera_device.hpp
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Merged in single header MF based camera from Chromium project.
3 
4 #pragma once
5 
6 #include <bnb/utils/defs.hpp>
7 
8 #if BNB_OS_WINDOWS
9 
10 // clang-format off
11 
12 #include <cassert>
13 #include <mutex>
14 #include <functional>
15 #include <atomic>
16 #include <mfapi.h>
17 #include <mferror.h>
18 #include <list>
19 #include <stddef.h>
20 #include <wincodec.h>
21 
22 #include <thread>
23 #include <utility>
24 #include <wrl.h>
25 
26 
27 #include <Windows.h>
28 #pragma comment(lib,"Mfplat.lib")
29 #pragma comment(lib,"Mf.lib")
30 #pragma comment(lib,"Mfreadwrite.lib")
31 #pragma comment(lib,"mfuuid.lib")
32 #pragma comment(lib,"shlwapi.lib")
33 
34 using Microsoft::WRL::ComPtr;
35 
36 #include "mfcaptureengine.h"
37 
38 
39 template <class T> void SafeRelease(T** ppT)
40 {
41  if (*ppT)
42  {
43  (*ppT)->Release();
44  *ppT = NULL;
45  }
46 }
47 
48 enum VideoPixelFormat {
49  PIXEL_FORMAT_UNKNOWN = 0, // Unknown or unspecified format value.
50  PIXEL_FORMAT_I420 =
51  1, // 12bpp YUV planar 1x1 Y, 2x2 UV samples, a.k.a. YU12.
52 
53 // Note: Chrome does not actually support YVU compositing, so you probably
54 // don't actually want to use this. See http://crbug.com/784627.
55 PIXEL_FORMAT_YV12 = 2, // 12bpp YVU planar 1x1 Y, 2x2 VU samples.
56 
57 PIXEL_FORMAT_I422 = 3, // 16bpp YUV planar 1x1 Y, 2x1 UV samples.
58 PIXEL_FORMAT_I420A = 4, // 20bpp YUVA planar 1x1 Y, 2x2 UV, 1x1 A samples.
59 PIXEL_FORMAT_I444 = 5, // 24bpp YUV planar, no subsampling.
60 PIXEL_FORMAT_NV12 =
61 6, // 12bpp with Y plane followed by a 2x2 interleaved UV plane.
62 PIXEL_FORMAT_NV21 =
63 7, // 12bpp with Y plane followed by a 2x2 interleaved VU plane.
64 PIXEL_FORMAT_UYVY =
65 8, // 16bpp interleaved 2x1 U, 1x1 Y, 2x1 V, 1x1 Y samples.
66 PIXEL_FORMAT_YUY2 =
67 9, // 16bpp interleaved 1x1 Y, 2x1 U, 1x1 Y, 2x1 V samples.
68 PIXEL_FORMAT_ARGB = 10, // 32bpp BGRA (byte-order), 1 plane.
69 PIXEL_FORMAT_XRGB = 11, // 24bpp BGRX (byte-order), 1 plane.
70 PIXEL_FORMAT_RGB24 = 12, // 24bpp BGR (byte-order), 1 plane.
71 
72 /* PIXEL_FORMAT_RGB32 = 13, Deprecated */
73 PIXEL_FORMAT_MJPEG = 14, // MJPEG compressed.
74 /* PIXEL_FORMAT_MT21 = 15, Deprecated */
75 
76 // The P* in the formats below designates the number of bits per pixel
77 // component. I.e. P9 is 9-bits per pixel component, P10 is 10-bits per pixel
78 // component, etc.
79 PIXEL_FORMAT_YUV420P9 = 16,
80 PIXEL_FORMAT_YUV420P10 = 17,
81 PIXEL_FORMAT_YUV422P9 = 18,
82 PIXEL_FORMAT_YUV422P10 = 19,
83 PIXEL_FORMAT_YUV444P9 = 20,
84 PIXEL_FORMAT_YUV444P10 = 21,
85 PIXEL_FORMAT_YUV420P12 = 22,
86 PIXEL_FORMAT_YUV422P12 = 23,
87 PIXEL_FORMAT_YUV444P12 = 24,
88 
89 /* PIXEL_FORMAT_Y8 = 25, Deprecated */
90 PIXEL_FORMAT_Y16 = 26, // single 16bpp plane.
91 
92 PIXEL_FORMAT_ABGR = 27, // 32bpp RGBA (byte-order), 1 plane.
93 PIXEL_FORMAT_XBGR = 28, // 24bpp RGBX (byte-order), 1 plane.
94 
95 PIXEL_FORMAT_P016LE = 29, // 24bpp NV12, 16 bits per channel
96 
97 PIXEL_FORMAT_XR30 =
98 30, // 32bpp BGRX, 10 bits per channel, 2 bits ignored, 1 plane
99 PIXEL_FORMAT_XB30 =
100 31, // 32bpp RGBX, 10 bits per channel, 2 bits ignored, 1 plane
101 
102 PIXEL_FORMAT_BGRA = 32, // 32bpp ARGB (byte-order), 1 plane.
103 
104 // Please update UMA histogram enumeration when adding new formats here.
105 PIXEL_FORMAT_MAX =
106 PIXEL_FORMAT_BGRA, // Must always be equal to largest entry logged.
107 };
108 
109 // This list is ordered by precedence of use.
110 static VideoPixelFormat const kSupportedCapturePixelFormats[] = {
111  PIXEL_FORMAT_NV12, PIXEL_FORMAT_I420, PIXEL_FORMAT_YV12,
112  PIXEL_FORMAT_NV21, PIXEL_FORMAT_UYVY, PIXEL_FORMAT_YUY2,
113  PIXEL_FORMAT_RGB24, PIXEL_FORMAT_ARGB, PIXEL_FORMAT_MJPEG,
114 };
115 
116 
117 namespace gfx {
118  struct Size {
119  int width_ = 0;
120  int height_ = 0;
121  void SetWidthAndHeight(int w, int h) {
122  width_ = w;
123  height_ = h;
124  }
125  int width() const { return width_; }
126  int height() const { return height_; }
127  };
128 }
129 
130 
131 
132 struct VideoCaptureFormat {
133  gfx::Size frame_size;
134  float frame_rate = 0;
135  VideoPixelFormat pixel_format = PIXEL_FORMAT_UNKNOWN;
136 };
137 
138 HRESULT CreateCaptureEngine(IMFCaptureEngine** engine) {
139  ComPtr<IMFCaptureEngineClassFactory> capture_engine_class_factory;
140  HRESULT hr = CoCreateInstance(CLSID_MFCaptureEngineClassFactory, nullptr,
141  CLSCTX_INPROC_SERVER,
142  IID_PPV_ARGS(&capture_engine_class_factory));
143  if (FAILED(hr))
144  return hr;
145 
146  return capture_engine_class_factory->CreateInstance(CLSID_MFCaptureEngine,
147  IID_PPV_ARGS(engine));
148 }
149 
150 HRESULT CopyAttribute(IMFAttributes* source_attributes,
151  IMFAttributes* destination_attributes,
152  const GUID& key) {
153  PROPVARIANT var;
154  PropVariantInit(&var);
155  HRESULT hr = source_attributes->GetItem(key, &var);
156  if (FAILED(hr))
157  return hr;
158 
159  hr = destination_attributes->SetItem(key, var);
160  PropVariantClear(&var);
161  return hr;
162 }
163 
164 HRESULT CreateVideoDeviceSource(IMFMediaSource** ppSource, size_t index)
165 {
166  UINT32 count = 0;
167 
168  *ppSource = NULL;
169 
170  IMFMediaSource* pSource = NULL;
171  IMFAttributes* pAttributes = NULL;
172  IMFActivate** ppDevices = NULL;
173 
174  // Create an attribute store to specify the enumeration parameters.
175  HRESULT hr = MFCreateAttributes(&pAttributes, 1);
176  if (FAILED(hr))
177  {
178  goto done;
179  }
180 
181  // Source type: video capture devices
182  hr = pAttributes->SetGUID(
183  MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE,
184  MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID
185  );
186  if (FAILED(hr))
187  {
188  goto done;
189  }
190 
191  // Enumerate devices.
192  hr = MFEnumDeviceSources(pAttributes, &ppDevices, &count);
193  if (FAILED(hr))
194  {
195  goto done;
196  }
197 
198  if (count == 0 || index >= count)
199  {
200  hr = E_FAIL;
201  goto done;
202  }
203 
204  // Create the media source object.
205  hr = ppDevices[index]->ActivateObject(IID_PPV_ARGS(&pSource));
206  if (FAILED(hr))
207  {
208  goto done;
209  }
210 
211  *ppSource = pSource;
212  (*ppSource)->AddRef();
213 
214 done:
215  SafeRelease(&pAttributes);
216 
217  for (DWORD i = 0; i < count; i++)
218  {
219  SafeRelease(&ppDevices[i]);
220  }
221  CoTaskMemFree(ppDevices);
222  SafeRelease(&pSource);
223  return hr;
224 }
225 
226 
227 bool GetFrameSizeFromMediaType(IMFMediaType* type, gfx::Size* frame_size) {
228  UINT32 width32, height32;
229  if (FAILED(MFGetAttributeSize(type, MF_MT_FRAME_SIZE, &width32, &height32)))
230  return false;
231  frame_size->SetWidthAndHeight(width32, height32);
232  return true;
233 }
234 
235 struct MediaFormatConfiguration {
236  GUID mf_source_media_subtype;
237  GUID mf_sink_media_subtype;
238  VideoPixelFormat pixel_format;
239 };
240 
241 bool GetMediaFormatConfigurationFromMFSourceMediaSubtype(
242  const GUID& mf_source_media_subtype,
243  MediaFormatConfiguration* media_format_configuration) {
244  static const MediaFormatConfiguration kMediaFormatConfigurationMap[] = {
245  // IMFCaptureEngine inevitably performs the video frame decoding itself.
246  // This means that the sink must always be set to an uncompressed video
247  // format.
248 
249  // Since chromium uses I420 at the other end of the pipe, MF known video
250  // output formats are always set to I420.
251  {MFVideoFormat_I420, MFVideoFormat_I420, PIXEL_FORMAT_I420},
252  {MFVideoFormat_YUY2, MFVideoFormat_I420, PIXEL_FORMAT_I420},
253  {MFVideoFormat_UYVY, MFVideoFormat_I420, PIXEL_FORMAT_I420},
254  {MFVideoFormat_RGB24, MFVideoFormat_I420, PIXEL_FORMAT_I420},
255  {MFVideoFormat_RGB32, MFVideoFormat_I420, PIXEL_FORMAT_I420},
256  {MFVideoFormat_ARGB32, MFVideoFormat_I420, PIXEL_FORMAT_I420},
257  {MFVideoFormat_MJPG, MFVideoFormat_I420, PIXEL_FORMAT_I420},
258  {MFVideoFormat_NV12, MFVideoFormat_NV12, PIXEL_FORMAT_NV12},
259  {MFVideoFormat_YV12, MFVideoFormat_I420, PIXEL_FORMAT_I420},
260 
261  /*
262  // Depth cameras use 16-bit uncompressed video formats.
263  // We ask IMFCaptureEngine to let the frame pass through, without
264  // transcoding, since transcoding would lead to precision loss.
265  {kMediaSubTypeY16, kMediaSubTypeY16, PIXEL_FORMAT_Y16},
266  {kMediaSubTypeZ16, kMediaSubTypeZ16, PIXEL_FORMAT_Y16},
267  {kMediaSubTypeINVZ, kMediaSubTypeINVZ, PIXEL_FORMAT_Y16},
268  {MFVideoFormat_D16, MFVideoFormat_D16, PIXEL_FORMAT_Y16},
269  */
270 
271  // Photo type
272  {GUID_ContainerFormatJpeg, GUID_ContainerFormatJpeg, PIXEL_FORMAT_MJPEG} };
273 
274  for (const auto& kMediaFormatConfiguration : kMediaFormatConfigurationMap) {
275  if (IsEqualGUID(kMediaFormatConfiguration.mf_source_media_subtype,
276  mf_source_media_subtype)) {
277  *media_format_configuration = kMediaFormatConfiguration;
278  return true;
279  }
280  }
281 
282  return false;
283 }
284 
285 
286 bool GetPixelFormatFromMFSourceMediaSubtype(const GUID& mf_source_media_subtype, VideoPixelFormat* pixel_format) {
287  MediaFormatConfiguration media_format_configuration;
288  if (!GetMediaFormatConfigurationFromMFSourceMediaSubtype(
289  mf_source_media_subtype, &media_format_configuration))
290  return false;
291  *pixel_format = media_format_configuration.pixel_format;
292  return true;
293 }
294 
295 // Calculate sink subtype based on source subtype. |passthrough| is set when
296 // sink and source are the same and means that there should be no transcoding
297 // done by IMFCaptureEngine.
298 HRESULT GetMFSinkMediaSubtype(IMFMediaType* source_media_type,
299  GUID* mf_sink_media_subtype,
300  bool* passthrough) {
301  GUID source_subtype;
302  HRESULT hr = source_media_type->GetGUID(MF_MT_SUBTYPE, &source_subtype);
303  if (FAILED(hr))
304  return hr;
305  MediaFormatConfiguration media_format_configuration;
306  if (!GetMediaFormatConfigurationFromMFSourceMediaSubtype(
307  source_subtype, &media_format_configuration))
308  return E_FAIL;
309  *mf_sink_media_subtype = media_format_configuration.mf_sink_media_subtype;
310  *passthrough =
311  IsEqualGUID(media_format_configuration.mf_sink_media_subtype, source_subtype);
312  return S_OK;
313 }
314 
315 HRESULT ConvertToVideoSinkMediaType(IMFMediaType* source_media_type,
316  IMFMediaType* sink_media_type, UINT32 frame_rate) {
317  HRESULT hr = sink_media_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
318  if (FAILED(hr))
319  return hr;
320 
321  bool passthrough = false;
322  GUID mf_sink_media_subtype;
323  hr = GetMFSinkMediaSubtype(source_media_type, &mf_sink_media_subtype,
324  &passthrough);
325  if (FAILED(hr))
326  return hr;
327 
328  hr = sink_media_type->SetGUID(MF_MT_SUBTYPE, mf_sink_media_subtype);
329  // Copying attribute values for passthrough mode is redundant, since the
330  // format is kept unchanged, and causes AddStream error MF_E_INVALIDMEDIATYPE.
331  if (FAILED(hr)/* || passthrough*/)
332  return hr;
333 
334  hr = CopyAttribute(source_media_type, sink_media_type, MF_MT_FRAME_SIZE);
335  if (FAILED(hr))
336  return hr;
337 
338  // set reuqested frame rate
339  hr = MFSetAttributeRatio(sink_media_type, MF_MT_FRAME_RATE, frame_rate, 1);
340  if (FAILED(hr))
341  return hr;
342  ///
343 
344  hr = CopyAttribute(source_media_type, sink_media_type,
345  MF_MT_PIXEL_ASPECT_RATIO);
346  if (FAILED(hr))
347  return hr;
348 
349  return CopyAttribute(source_media_type, sink_media_type,
350  MF_MT_INTERLACE_MODE);
351 }
352 
353 
354 struct CapabilityWin {
355  CapabilityWin(int media_type_index, const VideoCaptureFormat& format)
356  : media_type_index(media_type_index),
357  supported_format(format),
358  info_header(),
359  stream_index(0) {}
360 
361  // Used by VideoCaptureDeviceWin.
362  CapabilityWin(int media_type_index,
363  const VideoCaptureFormat& format,
364  const BITMAPINFOHEADER& info_header)
365  : media_type_index(media_type_index),
366  supported_format(format),
367  info_header(info_header),
368  stream_index(0) {}
369 
370  // Used by VideoCaptureDeviceMFWin.
371  CapabilityWin(int media_type_index,
372  const VideoCaptureFormat& format,
373  int stream_index)
374  : media_type_index(media_type_index),
375  supported_format(format),
376  info_header(),
377  stream_index(stream_index) {}
378 
379  const int media_type_index;
380  const VideoCaptureFormat supported_format;
381 
382  // |info_header| is only valid if DirectShow is used.
383  const BITMAPINFOHEADER info_header;
384 
385  // |stream_index| is only valid if MediaFoundation is used.
386  const int stream_index;
387 };
388 
389 typedef std::list<CapabilityWin> CapabilityList;
390 
391 
392 // static
393 bool ComparePixelFormatPreference(
394  const VideoPixelFormat& lhs,
395  const VideoPixelFormat& rhs) {
396  auto* format_lhs = std::find(
397  kSupportedCapturePixelFormats,
398  kSupportedCapturePixelFormats + std::size(kSupportedCapturePixelFormats),
399  lhs);
400  auto* format_rhs = std::find(
401  kSupportedCapturePixelFormats,
402  kSupportedCapturePixelFormats + std::size(kSupportedCapturePixelFormats),
403  rhs);
404  return format_lhs < format_rhs;
405 }
406 
407 
408 // Compares the priority of the capture formats. Returns true if |lhs| is the
409 // preferred capture format in comparison with |rhs|. Returns false otherwise.
410 bool CompareCapability(const VideoCaptureFormat& requested,
411  const VideoCaptureFormat& lhs,
412  const VideoCaptureFormat& rhs) {
413  // When 16-bit format is requested and available, avoid other formats.
414  // If both lhs and rhs are 16-bit, we still need to compare them based on
415  // height, width and frame rate.
416  const bool use_requested =
417  (requested.pixel_format == PIXEL_FORMAT_Y16);
418  if (use_requested && lhs.pixel_format != rhs.pixel_format) {
419  if (lhs.pixel_format == requested.pixel_format)
420  return true;
421  if (rhs.pixel_format == requested.pixel_format)
422  return false;
423  }
424  const int diff_height_lhs =
425  std::abs(lhs.frame_size.height() - requested.frame_size.height());
426  const int diff_height_rhs =
427  std::abs(rhs.frame_size.height() - requested.frame_size.height());
428  if (diff_height_lhs != diff_height_rhs)
429  return diff_height_lhs < diff_height_rhs;
430 
431  const int diff_width_lhs =
432  std::abs(lhs.frame_size.width() - requested.frame_size.width());
433  const int diff_width_rhs =
434  std::abs(rhs.frame_size.width() - requested.frame_size.width());
435  if (diff_width_lhs != diff_width_rhs)
436  return diff_width_lhs < diff_width_rhs;
437 
438  const float diff_fps_lhs = std::fabs(lhs.frame_rate - requested.frame_rate);
439  const float diff_fps_rhs = std::fabs(rhs.frame_rate - requested.frame_rate);
440  if (diff_fps_lhs != diff_fps_rhs)
441  return diff_fps_lhs < diff_fps_rhs;
442 
443  return ComparePixelFormatPreference(lhs.pixel_format,
444  rhs.pixel_format);
445 }
446 
447 const CapabilityWin& GetBestMatchedCapability(
448  const VideoCaptureFormat& requested,
449  const CapabilityList& capabilities) {
450 
451  const CapabilityWin* best_match = &(*capabilities.begin());
452  for (const CapabilityWin& capability : capabilities) {
453  if (CompareCapability(requested, capability.supported_format,
454  best_match->supported_format)) {
455  best_match = &capability;
456  }
457  }
458  return *best_match;
459 }
460 
461 
462 
463 
464 // Locks the given buffer using the fastest supported method when constructed,
465 // and automatically unlocks the buffer when destroyed.
466 class ScopedBufferLock {
467 public:
468  explicit ScopedBufferLock(ComPtr<IMFMediaBuffer> buffer)
469  : buffer_(std::move(buffer)) {
470  if (FAILED(buffer_.As(&buffer_2d_))) {
471  LockSlow();
472  return;
473  }
474  // Try lock methods from fastest to slowest: Lock2DSize(), then Lock2D(),
475  // then finally LockSlow().
476  if (Lock2DSize() || Lock2D()) {
477  if (IsContiguous())
478  return;
479  buffer_2d_->Unlock2D();
480  }
481  // Fall back to LockSlow() if 2D buffer was unsupported or noncontiguous.
482  buffer_2d_ = nullptr;
483  LockSlow();
484  }
485 
486  // Returns whether |buffer_2d_| is contiguous with positive pitch, i.e., the
487  // buffer format that the surrounding code expects.
488  bool IsContiguous() {
489  BOOL is_contiguous;
490  return pitch_ > 0 &&
491  SUCCEEDED(buffer_2d_->IsContiguousFormat(&is_contiguous)) &&
492  is_contiguous &&
493  (length_ || SUCCEEDED(buffer_2d_->GetContiguousLength(&length_)));
494  }
495 
496  bool Lock2DSize() {
497  ComPtr<IMF2DBuffer2> buffer_2d_2;
498  if (FAILED(buffer_.As(&buffer_2d_2)))
499  return false;
500  BYTE* data_start;
501  return SUCCEEDED(buffer_2d_2->Lock2DSize(MF2DBuffer_LockFlags_Read, &data_,
502  &pitch_, &data_start, &length_));
503  }
504 
505  bool Lock2D() { return SUCCEEDED(buffer_2d_->Lock2D(&data_, &pitch_)); }
506 
507  void LockSlow() {
508  DWORD max_length = 0;
509  buffer_->Lock(&data_, &max_length, &length_);
510  }
511 
512  ~ScopedBufferLock() {
513  if (buffer_2d_)
514  buffer_2d_->Unlock2D();
515  else
516  buffer_->Unlock();
517  }
518 
519  ScopedBufferLock(const ScopedBufferLock&) = delete;
520  ScopedBufferLock& operator=(const ScopedBufferLock&) = delete;
521 
522  BYTE* data() const { return data_; }
523  DWORD length() const { return length_; }
524  LONG pitch() const { return pitch_; }
525 
526 private:
527  ComPtr<IMFMediaBuffer> buffer_;
528  ComPtr<IMF2DBuffer> buffer_2d_;
529  BYTE* data_ = nullptr;
530  DWORD length_ = 0;
531  LONG pitch_ = 0;
532 };
533 
534 template <typename T>
535 class MFVideoCallback final
536  :
537  public IMFCaptureEngineOnSampleCallback,
538  public IMFCaptureEngineOnEventCallback {
539 public:
540  MFVideoCallback(T* observer) : observer_(observer) {}
541 
542  IFACEMETHODIMP QueryInterface(REFIID riid, void** object) override {
543  HRESULT hr = E_NOINTERFACE;
544  if (IsEqualGUID(riid, IID_IUnknown)) {
545  *object = this;
546  hr = S_OK;
547  }
548  else if (IsEqualGUID(riid, IID_IMFCaptureEngineOnSampleCallback)) {
549  *object = static_cast<IMFCaptureEngineOnSampleCallback*>(this);
550  hr = S_OK;
551  }
552  else if (IsEqualGUID(riid, IID_IMFCaptureEngineOnEventCallback)) {
553  *object = static_cast<IMFCaptureEngineOnEventCallback*>(this);
554  hr = S_OK;
555  }
556  if (SUCCEEDED(hr))
557  AddRef();
558 
559  return hr;
560  }
561 
562  IFACEMETHODIMP_(ULONG) AddRef() override {
563  return InterlockedIncrement(&m_cRef);
564  }
565 
566  IFACEMETHODIMP_(ULONG) Release() override {
567  ULONG l = InterlockedDecrement(&m_cRef);
568  if (0 == l) delete this;
569  return l;
570  }
571 
572  IFACEMETHODIMP OnEvent(IMFMediaEvent* media_event) override {
573  if (!observer_) {
574  return S_OK;
575  }
576 
577  GUID capture_event_guid = GUID_NULL;
578  {
579  std::lock_guard<std::mutex> guard(lock_);
580  observer_->OnEvent(media_event);
581  if (HRESULT hr = media_event->GetExtendedType(&capture_event_guid); FAILED(hr)) {
582  return hr;
583  }
584  }
585  return S_OK;
586  }
587 
588  IFACEMETHODIMP OnSample(IMFSample* sample) override {
589  std::lock_guard<std::mutex> guard(lock_);
590 
591  if (!observer_) {
592  return S_OK;
593  }
594  if (!sample) {
595  observer_->OnFrameDropped();
596  return S_OK;
597  }
598  /*
599  base::TimeTicks reference_time(base::TimeTicks::Now());
600  LONGLONG raw_time_stamp = 0;
601  sample->GetSampleTime(&raw_time_stamp);
602  base::TimeDelta timestamp =
603  base::TimeDelta::FromMicroseconds(raw_time_stamp / 10);
604  */
605 
606  DWORD count = 0;
607  sample->GetBufferCount(&count);
608 
609  for (DWORD i = 0; i < count; ++i) {
610  ComPtr<IMFMediaBuffer> buffer;
611  sample->GetBufferByIndex(i, &buffer);
612  if (buffer) {
613  auto locked_buffer = std::make_shared<ScopedBufferLock>(buffer);
614  if (locked_buffer->data()) {
615  observer_->OnIncomingCapturedData(std::move(locked_buffer));
616  }
617  else {
618  observer_->OnFrameDropped();
619  }
620  }
621  else {
622  observer_->OnFrameDropped();
623  }
624  }
625  return S_OK;
626  }
627 
628  void Shutdown() {
629  std::lock_guard<std::mutex> guard(lock_);
630  observer_ = nullptr;
631  }
632 
633 private:
634  ULONG m_cRef = 0;
635  ~MFVideoCallback() {}
636  std::mutex lock_;
637  T* observer_;
638 };
639 
640 class VideoCaptureDeviceMFWin {
641 public:
642  using callback_t = std::function<void(std::shared_ptr<ScopedBufferLock> lock)>;
643  VideoCaptureDeviceMFWin(ComPtr<IMFMediaSource>& source)
644  :source_(source) {
645  camera_stop_ = ::CreateEvent(NULL, TRUE, FALSE, "CameraStop");
646  capture_error_ = ::CreateEvent(NULL, TRUE, FALSE, "CaptureError");
647  capture_initialize_ = ::CreateEvent(NULL, TRUE, FALSE, "CaptureInitialize");
648  }
649 
650  void SetCallback(callback_t callback) {
651  std::lock_guard<std::mutex> lock(mutex_);
652  callback_ = callback;
653  }
654 
655  ~VideoCaptureDeviceMFWin() {
656  ::CloseHandle(camera_stop_);
657  ::CloseHandle(capture_error_);
658  ::CloseHandle(capture_initialize_);
659  }
660 
661  void OnEvent(IMFMediaEvent* media_event) {
662  HRESULT hr;
663  GUID capture_event_guid = GUID_NULL;
664 
665  media_event->GetStatus(&hr);
666  media_event->GetExtendedType(&capture_event_guid);
667  if (IsEqualGUID(capture_event_guid, MF_CAPTURE_ENGINE_ERROR) || FAILED(hr)) {
668  ::SetEvent(capture_error_);
669  }
670  else if (IsEqualGUID(capture_event_guid, MF_CAPTURE_ENGINE_INITIALIZED)) {
671  ::SetEvent(capture_initialize_);
672  }
673  else if (IsEqualGUID(capture_event_guid, MF_CAPTURE_ENGINE_PREVIEW_STOPPED)) {
674  ::SetEvent(camera_stop_);
675  }
676  }
677 
678  void OnFrameDropped() {
679 
680  }
681 
682 
683  void OnIncomingCapturedData(std::shared_ptr<ScopedBufferLock> buffer_lock) {
684  std::lock_guard<std::mutex> lock(mutex_);
685  if (callback_) {
686  callback_(std::move(buffer_lock));
687  }
688  }
689 
690  HRESULT Init() {
691  HRESULT hr = S_OK;
692 
693  if (!engine_) {
694  hr = CreateCaptureEngine(&engine_);
695  if (FAILED(hr)) {
696  assert(0);
697  return hr;
698  }
699  }
700 
701  ComPtr<IMFAttributes> attributes;
702  hr = MFCreateAttributes(&attributes, 1);
703  if (FAILED(hr)) {
704  assert(0);
705  return hr;
706  }
707 
708  hr = attributes->SetUINT32(MF_CAPTURE_ENGINE_USE_VIDEO_DEVICE_ONLY, TRUE);
709  if (FAILED(hr)) {
710  assert(0);
711  return hr;
712  }
713 
714 
715  video_callback_ = new MFVideoCallback<VideoCaptureDeviceMFWin>(this);
716 
717 
718  hr = engine_->Initialize(video_callback_.Get(), attributes.Get(), nullptr, source_.Get());
719  if (FAILED(hr)) {
720  assert(0);
721  return hr;
722  }
723 
724  HANDLE events[] = { capture_initialize_, capture_error_ };
725 
726  DWORD wait_result = ::WaitForMultipleObjects(2, events, FALSE, INFINITE);
727  switch (wait_result) {
728  case WAIT_OBJECT_0:
729  break;
730  case WAIT_FAILED:
731  hr = HRESULT_FROM_WIN32(::GetLastError());
732  assert(0);
733  break;
734  default:
735  hr = E_UNEXPECTED;
736  assert(0);
737  break;
738  }
739  return hr;
740  }
741 
742  bool GetFrameRateFromMediaType(IMFMediaType* type, float* frame_rate) {
743  UINT32 numerator, denominator;
744  if (FAILED(MFGetAttributeRatio(type, MF_MT_FRAME_RATE, &numerator,
745  &denominator)) ||
746  !denominator) {
747  return false;
748  }
749  *frame_rate = static_cast<float>(numerator) / denominator;
750  return true;
751  }
752 
753  bool GetFormatFromSourceMediaType(IMFMediaType* source_media_type, bool photo, VideoCaptureFormat* format) {
754  GUID major_type_guid;
755  if (FAILED(source_media_type->GetGUID(MF_MT_MAJOR_TYPE, &major_type_guid)) ||
756  (!IsEqualGUID(major_type_guid, MFMediaType_Image) &&
757  (photo ||
758  !GetFrameRateFromMediaType(source_media_type, &format->frame_rate)))) {
759  return false;
760  }
761 
762  GUID sub_type_guid;
763  if (FAILED(source_media_type->GetGUID(MF_MT_SUBTYPE, &sub_type_guid)) ||
764  !GetFrameSizeFromMediaType(source_media_type, &format->frame_size) ||
765  !GetPixelFormatFromMFSourceMediaSubtype(
766  sub_type_guid, &format->pixel_format)) {
767  return false;
768  }
769 
770  return true;
771  }
772 
773 
774  HRESULT FillCapabilities(IMFCaptureSource* source, CapabilityList* capabilities) {
775  DWORD stream_count = 0;
776  HRESULT hr = source->GetDeviceStreamCount(&stream_count);
777  if (FAILED(hr)) { assert(0); }
778 
779  for (DWORD stream_index = 0; stream_index < stream_count; stream_index++) {
780  MF_CAPTURE_ENGINE_STREAM_CATEGORY stream_category;
781  hr = source->GetDeviceStreamCategory(stream_index, &stream_category);
782  if (FAILED(hr)) { assert(0); }
783  if (stream_category != MF_CAPTURE_ENGINE_STREAM_CATEGORY_VIDEO_PREVIEW &&
784  stream_category != MF_CAPTURE_ENGINE_STREAM_CATEGORY_VIDEO_CAPTURE) {
785  continue;
786  }
787 
788  DWORD media_type_index = 0;
789  ComPtr<IMFMediaType> type;
790 
791  while (SUCCEEDED(source->GetAvailableDeviceMediaType(stream_index, media_type_index, &type))) {
792  VideoCaptureFormat format;
793  if (GetFormatFromSourceMediaType(type.Get(), false, &format))
794  capabilities->emplace_back(media_type_index, format, stream_index);
795  type.Reset();
796  ++media_type_index;
797  }
798  if (hr == MF_E_NO_MORE_TYPES) {
799  hr = S_OK;
800  }
801  if (FAILED(hr)) {
802  return hr;
803  }
804  }
805  return hr;
806  }
807 
808  HRESULT AllocateAndStart(int width, int height, UINT32 frame_rate, VideoPixelFormat& pixel_format) {
809  ComPtr<IMFCaptureSource> source;
810  HRESULT hr = engine_->GetSource(&source);
811  if (FAILED(hr)) { return hr; }
812 
813  CapabilityList video_capabilities;
814  FillCapabilities(source.Get(), &video_capabilities);
815 
816  VideoCaptureFormat requested;
817  requested.frame_size.SetWidthAndHeight(width, height);
818  requested.frame_rate = static_cast<float>(frame_rate);
819  requested.pixel_format = pixel_format;
820 
821  const CapabilityWin best_match_video_capability =
822  GetBestMatchedCapability(requested, video_capabilities);
823  if (best_match_video_capability.supported_format.frame_size.width() != width ||
824  best_match_video_capability.supported_format.frame_size.height() != height) {
825  return -1;
826  }
827 
828  pixel_format = best_match_video_capability.supported_format.pixel_format;
829 
830  ComPtr<IMFMediaType> source_video_media_type;
831  hr = source->GetAvailableDeviceMediaType(best_match_video_capability.stream_index,
832  best_match_video_capability.media_type_index, &source_video_media_type);
833  if (FAILED(hr)) { return hr; }
834 
835  hr = source->SetCurrentDeviceMediaType(
836  best_match_video_capability.stream_index, source_video_media_type.Get());
837  if (FAILED(hr)) { return hr; }
838 
839  ComPtr<IMFCaptureSink> sink;
840  hr = engine_->GetSink(MF_CAPTURE_ENGINE_SINK_TYPE_PREVIEW, &sink);
841  if (FAILED(hr)) { return hr; }
842 
843  ComPtr<IMFCapturePreviewSink> preview_sink;
844  hr = sink->QueryInterface(IID_PPV_ARGS(&preview_sink));
845  if (FAILED(hr)) { return hr; }
846 
847  hr = preview_sink->RemoveAllStreams();
848  if (FAILED(hr)) { return hr; }
849 
850  ComPtr<IMFMediaType> sink_video_media_type;
851  hr = MFCreateMediaType(&sink_video_media_type);
852  if (FAILED(hr)) { return hr; }
853 
854  hr = ConvertToVideoSinkMediaType(source_video_media_type.Get(), sink_video_media_type.Get(), frame_rate);
855  if (FAILED(hr)) { return hr; }
856 
857  DWORD dw_sink_stream_index = 0;
858  hr = preview_sink->AddStream(best_match_video_capability.stream_index,
859  sink_video_media_type.Get(), nullptr,
860  &dw_sink_stream_index);
861  if (FAILED(hr)) { return hr; }
862 
863  hr = preview_sink->SetSampleCallback(dw_sink_stream_index,
864  video_callback_.Get());
865  if (FAILED(hr)) { return hr; }
866 
867  // Note, that it is not sufficient to wait for
868  // MF_CAPTURE_ENGINE_PREVIEW_STARTED as an indicator that starting capture has
869  // succeeded. If the capture device is already in use by a different
870  // application, MediaFoundation will still emit
871  // MF_CAPTURE_ENGINE_PREVIEW_STARTED, and only after that raise an error
872  // event. For the lack of any other events indicating success, we have to wait
873  // for the first video frame to arrive before sending our |OnStarted| event to
874  // |client_|.
875  hr = engine_->StartPreview();
876  if (FAILED(hr)) { return hr; }
877 
878  return S_OK;
879  }
880 
881  HRESULT Stop() {
882  HRESULT hr = engine_->StopPreview();
883  if (FAILED(hr)) {
884  return hr;
885  }
886 
887  HANDLE events[] = { camera_stop_, capture_error_ };
888 
889  DWORD wait_result = ::WaitForMultipleObjects(2, events, FALSE, INFINITE);
890  if(wait_result == WAIT_OBJECT_0) {
891  return S_OK;
892  }
893  return E_FAIL;
894  }
895 
896 private:
897  HANDLE camera_stop_;
898  HANDLE capture_error_;
899  HANDLE capture_initialize_;
900  ComPtr<MFVideoCallback<VideoCaptureDeviceMFWin>> video_callback_;
901  ComPtr<IMFMediaSource> source_;
902  ComPtr<IMFCaptureEngine> engine_;
903  std::mutex mutex_;
904  callback_t callback_ = nullptr;
905 };
906 
907 // clang-format on
908 
909 #endif