1 // [Blend2D]
2 // 2D Vector Graphics Powered by a JIT Compiler.
3 //
4 // [License]
5 // Zlib - See LICENSE.md file in the package.
6 
7 #ifndef BLEND2D_PIPEGEN_COMPOPPART_P_H
8 #define BLEND2D_PIPEGEN_COMPOPPART_P_H
9 
10 #include "../pipegen/fetchpart_p.h"
11 #include "../pipegen/pipepart_p.h"
12 
13 //! \cond INTERNAL
14 //! \addtogroup blend2d_internal_pipegen
15 //! \{
16 
17 namespace BLPipeGen {
18 
19 // ============================================================================
20 // [BLPipeGen::CompOpPart]
21 // ============================================================================
22 
23 //! Pipeline combine part.
24 class CompOpPart : public PipePart {
25 public:
26   BL_NONCOPYABLE(CompOpPart)
27 
28   enum : uint32_t {
29     kIndexDstPart = 0,
30     kIndexSrcPart = 1
31   };
32 
33   //! Composition operator.
34   uint32_t _compOp;
35   //! Pixel type of the composition, see `Pixel::Type`.
36   uint8_t _pixelType;
37   //! The current span mode.
38   uint8_t _cMaskLoopType;
39   //! Maximum pixels the compositor can handle at a time.
40   uint8_t _maxPixels;
41   //! Pixel granularity.
42   uint8_t _pixelGranularity;
43   //! Minimum alignment required to process `_maxPixels`.
44   uint8_t _minAlignment;
45 
46   uint8_t _isInPartialMode : 1;
47   //! Whether the destination format has an alpha component.
48   uint8_t _hasDa : 1;
49   //! Whether the source format has an alpha component.
50   uint8_t _hasSa : 1;
51 
52   //! A hook that is used by the current loop.
53   asmjit::BaseNode* _cMaskLoopHook;
54   //! Optimized solid pixel for operators that allow it.
55   SolidPixel _solidOpt;
56   //! Pre-processed solid pixel for TypeA operators that always use `vMaskProc?()`.
57   Pixel _solidPre;
58   //! Partial fetch that happened at the end of the scanline (border case).
59   Pixel _partialPixel;
60   //! Const mask.
61   BLWrap<PipeCMask> _mask;
62 
63   CompOpPart(PipeCompiler* pc, uint32_t compOp, FetchPart* dstPart, FetchPart* srcPart) noexcept;
64 
dstPart()65   BL_INLINE FetchPart* dstPart() const noexcept { return reinterpret_cast<FetchPart*>(_children[kIndexDstPart]); }
srcPart()66   BL_INLINE FetchPart* srcPart() const noexcept { return reinterpret_cast<FetchPart*>(_children[kIndexSrcPart]); }
67 
68   //! Returns the composition operator id, see `BLCompOp`.
compOp()69   BL_INLINE uint32_t compOp() const noexcept { return _compOp; }
70   //! Returns the composition operator flags, see `BLCompOpFlags`.
compOpFlags()71   BL_INLINE uint32_t compOpFlags() const noexcept { return blCompOpInfo[_compOp].flags; }
72 
73   //! Tests whether the destination pixel format has an alpha component.
hasDa()74   BL_INLINE bool hasDa() const noexcept { return _hasDa != 0; }
75   //! Tests whether the source pixel format has an alpha component.
hasSa()76   BL_INLINE bool hasSa() const noexcept { return _hasSa != 0; }
77 
pixelType()78   BL_INLINE uint32_t pixelType() const noexcept { return _pixelType; }
isAlphaType()79   BL_INLINE bool isAlphaType() const noexcept { return _pixelType == Pixel::kTypeAlpha; }
isRGBAType()80   BL_INLINE bool isRGBAType() const noexcept { return _pixelType == Pixel::kTypeRGBA; }
81 
82   //! Returns the current loop mode.
cMaskLoopType()83   BL_INLINE uint32_t cMaskLoopType() const noexcept { return _cMaskLoopType; }
84   //! Tests whether the current loop is fully opaque (no mask).
isLoopOpaque()85   BL_INLINE bool isLoopOpaque() const noexcept { return _cMaskLoopType == kCMaskLoopTypeOpaque; }
86   //! Tests whether the current loop is `CMask` (constant mask).
isLoopCMask()87   BL_INLINE bool isLoopCMask() const noexcept { return _cMaskLoopType == kCMaskLoopTypeMask; }
88 
89   //! Returns the maximum pixels the composite part can handle at a time.
90   //!
91   //! \note This value is configured in a way that it's always one if the fetch
92   //! part doesn't support more. This makes it easy to use it in loop compilers.
93   //! In other words, the value doesn't describe the real implementation of the
94   //! composite part.
maxPixels()95   BL_INLINE uint32_t maxPixels() const noexcept { return _maxPixels; }
96   //! Returns the maximum pixels the children of this part can handle.
maxPixelsOfChildren()97   BL_INLINE uint32_t maxPixelsOfChildren() const noexcept { return blMin(dstPart()->maxPixels(), srcPart()->maxPixels()); }
98 
99   //! Returns pixel granularity passed to `init()`, otherwise the result should be zero.
pixelGranularity()100   BL_INLINE uint32_t pixelGranularity() const noexcept { return _pixelGranularity; }
101   //! Returns the minimum destination alignment required to the maximum number of pixels `_maxPixels`.
minAlignment()102   BL_INLINE uint32_t minAlignment() const noexcept { return _minAlignment; }
103 
isUsingSolidPre()104   BL_INLINE bool isUsingSolidPre() const noexcept { return !_solidPre.pc.empty() || !_solidPre.uc.empty(); }
105 
106   void init(x86::Gp& x, x86::Gp& y, uint32_t pixelGranularity) noexcept;
107   void fini() noexcept;
108 
109   //! Tests whether the opaque fill should be optimized and placed into a separate
110   //! loop.
111   bool shouldOptimizeOpaqueFill() const noexcept;
112 
113   //! Tests whether the compositor should emit a specialized loop that contains
114   //! an inlined version of `memcpy()` or `memset()`.
115   bool shouldJustCopyOpaqueFill() const noexcept;
116 
117   void startAtX(x86::Gp& x) noexcept;
118   void advanceX(x86::Gp& x, x86::Gp& diff) noexcept;
119   void advanceY() noexcept;
120 
121   // These are just wrappers that call these on both source & destination parts.
122   void prefetch1() noexcept;
123   void enterN() noexcept;
124   void leaveN() noexcept;
125   void prefetchN() noexcept;
126   void postfetchN() noexcept;
127 
128   void dstFetch(Pixel& p, uint32_t flags, uint32_t n) noexcept;
129   void srcFetch(Pixel& p, uint32_t flags, uint32_t n) noexcept;
130 
isInPartialMode()131   BL_INLINE bool isInPartialMode() const noexcept {
132     return _isInPartialMode != 0;
133   }
134 
135   void enterPartialMode(uint32_t partialFlags = 0) noexcept;
136   void exitPartialMode() noexcept;
137   void nextPartialPixel() noexcept;
138 
139   void cMaskInit(const x86::Mem& mem) noexcept;
140   void cMaskInit(const x86::Gp& sm_, const x86::Vec& vm_) noexcept;
141   void cMaskInitOpaque() noexcept;
142   void cMaskFini() noexcept;
143 
144   void _cMaskLoopInit(uint32_t loopType) noexcept;
145   void _cMaskLoopFini() noexcept;
146 
147   void cMaskGenericLoop(x86::Gp& i) noexcept;
148   void cMaskGenericLoopXmm(x86::Gp& i) noexcept;
149 
150   void cMaskGranularLoop(x86::Gp& i) noexcept;
151   void cMaskGranularLoopXmm(x86::Gp& i) noexcept;
152 
153   void cMaskMemcpyOrMemsetLoop(x86::Gp& i) noexcept;
154   void cMaskCompositeAndStore(const x86::Mem& dPtr, uint32_t n, uint32_t alignment = 1) noexcept;
155   void vMaskProc(Pixel& out, uint32_t flags, x86::Gp& msk, bool mImmutable) noexcept;
156 
157   void cMaskInitA8(const x86::Gp& sm_, const x86::Vec& vm_) noexcept;
158   void cMaskFiniA8() noexcept;
159 
160   void cMaskProcA8Gp(Pixel& out, uint32_t flags) noexcept;
161   void vMaskProcA8Gp(Pixel& out, uint32_t flags, x86::Gp& msk, bool mImmutable) noexcept;
162 
163   void cMaskProcA8Xmm(Pixel& out, uint32_t n, uint32_t flags) noexcept;
164   void vMaskProcA8Xmm(Pixel& out, uint32_t n, uint32_t flags, VecArray& vm, bool mImmutable) noexcept;
165 
166   void cMaskInitRGBA32(const x86::Vec& vm) noexcept;
167   void cMaskFiniRGBA32() noexcept;
168 
169   void cMaskProcRGBA32Xmm(Pixel& out, uint32_t n, uint32_t flags) noexcept;
170   void vMaskProcRGBA32Xmm(Pixel& out, uint32_t n, uint32_t flags, VecArray& vm, bool mImmutable) noexcept;
171   void vMaskProcRGBA32InvertMask(VecArray& vn, VecArray& vm) noexcept;
172   void vMaskProcRGBA32InvertDone(VecArray& vn, bool mImmutable) noexcept;
173 };
174 
175 } // {BLPipeGen}
176 
177 //! \}
178 //! \endcond
179 
180 #endif // BLEND2D_PIPEGEN_COMPOPPART_P_H
181