Deep Learning Lecture 13: Introduction to Computer Vision

                                                                                                    

                                                                                                                            Deep
                                                                                                                            Learning
                                                                                                                            Lecture
                                                                                                                            13:
                                                                                                                            Introduction
                                                                                                                            to
                                                                                                                            Computer
                                                                                                                            Vision
                                                                                                                        

                                                                                                                            Foundations:
                                                                                                                            Image
                                                                                                                            Representation,
                                                                                                                            Pixels,
                                                                                                                            Channels,
                                                                                                                            and
                                                                                                                            Basic
                                                                                                                            Processing
                                                                                                                        

                                                                                                                            20
                                                                                                                            Comprehensive
                                                                                                                            Slides
                                                                                                                        

                                                                                                                            Welcome
                                                                                                                            to
                                                                                                                            the
                                                                                                                            World
                                                                                                                            of
                                                                                                                            Machine
                                                                                                                            Vision
                                                                                                                        

                                                                                                                            Teaching
                                                                                                                            Computers
                                                                                                                            to
                                                                                                                            See
                                                                                                                            Like
                                                                                                                            Humans
                                                                                                                        

                                                                                                                                                                The
                                                                                                                                                                Extraordinary
                                                                                                                                                                Human
                                                                                                                                                                Ability:
                                                                                                                                                            

                                                                                                                                                                In
                                                                                                                                                                a
                                                                                                                                                                split
                                                                                                                                                                second,
                                                                                                                                                                you
                                                                                                                                                                can:
                                                                                                                                                            

                                                                                                                                                                Recognize
                                                                                                                                                                your
                                                                                                                                                                friend
                                                                                                                                                                in
                                                                                                                                                                a
                                                                                                                                                                crowded
                                                                                                                                                                room
                                                                                                                                                            

                                                                                                                                                                Distinguish
                                                                                                                                                                between
                                                                                                                                                                a
                                                                                                                                                                cat
                                                                                                                                                                and
                                                                                                                                                                a
                                                                                                                                                                dog
                                                                                                                                                            

                                                                                                                                                                Read
                                                                                                                                                                handwritten
                                                                                                                                                                text
                                                                                                                                                            

                                                                                                                                                                Navigate
                                                                                                                                                                through
                                                                                                                                                                traffic
                                                                                                                                                            

                                                                                                                                                                Understand
                                                                                                                                                                facial
                                                                                                                                                                expressions
                                                                                                                                                            

                                                                                                                                                                The
                                                                                                                                                                Computer
                                                                                                                                                                Challenge:
                                                                                                                                                            

                                                                                                                                                                To
                                                                                                                                                                a
                                                                                                                                                                computer,
                                                                                                                                                                an
                                                                                                                                                                image
                                                                                                                                                                is
                                                                                                                                                                just
                                                                                                                                                                a
                                                                                                                                                                grid
                                                                                                                                                                of
                                                                                                                                                                numbers.
                                                                                                                                                                Today's
                                                                                                                                                                journey
                                                                                                                                                                explores
                                                                                                                                                                how
                                                                                                                                                                we
                                                                                                                                                                bridge
                                                                                                                                                                the
                                                                                                                                                                gap
                                                                                                                                                                between
                                                                                                                                                                human
                                                                                                                                                                intuition
                                                                                                                                                                and
                                                                                                                                                                machine
                                                                                                                                                                understanding.
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Our
                                                                                                                            Learning
                                                                                                                            Adventure:
                                                                                                                        

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                How
                                                                                                                                                                                images
                                                                                                                                                                                become
                                                                                                                                                                                numbers
                                                                                                                                                                            

                                                                                                                                                                                computers
                                                                                                                                                                                can
                                                                                                                                                                                process
                                                                                                                                                                            

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                The
                                                                                                                                                                                building
                                                                                                                                                                                blocks
                                                                                                                                                                            

                                                                                                                                                                                of
                                                                                                                                                                                digital
                                                                                                                                                                                vision
                                                                                                                                                                            

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                Basic
                                                                                                                                                                                image
                                                                                                                                                                                operations
                                                                                                                                                                            

                                                                                                                                                                                that
                                                                                                                                                                                unlock
                                                                                                                                                                                understanding
                                                                                                                                                                            

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                The
                                                                                                                                                                                foundation
                                                                                                                                                                            

                                                                                                                                                                                for
                                                                                                                                                                                modern
                                                                                                                                                                                AI
                                                                                                                                                                                vision
                                                                                                                                                                                systems
                                                                                                                                                                            

                                                                                                                                                The
                                                                                                                                                Big
                                                                                                                                                Question:
                                                                                                                                                "How
                                                                                                                                                    do
                                                                                                                                                    we
                                                                                                                                                    teach
                                                                                                                                                    a
                                                                                                                                                    machine
                                                                                                                                                    to
                                                                                                                                                    see
                                                                                                                                                    the
                                                                                                                                                    world
                                                                                                                                                    the
                                                                                                                                                    way
                                                                                                                                                    we
                                                                                                                                                    do?"
                                                                                                                                            

                                                                                                                        

                                                                                                                            Real-World
                                                                                                                                Impact:
                                                                                                                            Computer
                                                                                                                            vision
                                                                                                                            powers
                                                                                                                            self-driving
                                                                                                                            cars,
                                                                                                                            medical
                                                                                                                            diagnosis,
                                                                                                                            face
                                                                                                                            recognition,
                                                                                                                            quality
                                                                                                                            control,
                                                                                                                            and
                                                                                                                            augmented
                                                                                                                            reality!
                                                                                                                        

                                                                                                                            The
                                                                                                                            Human
                                                                                                                            Vision
                                                                                                                            System
                                                                                                                            -
                                                                                                                            Our
                                                                                                                            Inspiration
                                                                                                                        

                                                                                                                            Understanding
                                                                                                                            What
                                                                                                                            We're
                                                                                                                            Trying
                                                                                                                            to
                                                                                                                            Replicate
                                                                                                                        

                                                                                                                            The
                                                                                                                                Biological
                                                                                                                                Marvel:
                                                                                                                            Your
                                                                                                                            visual
                                                                                                                            system
                                                                                                                            processes
                                                                                                                            information
                                                                                                                            through
                                                                                                                            multiple
                                                                                                                            stages:
                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                                Stage
                                                                                                                                                                                    1:
                                                                                                                                                                                    Light
                                                                                                                                                                                    Detection
                                                                                                                                                                            

                                                                                                                                                                                Eyes
                                                                                                                                                                                capture
                                                                                                                                                                                light
                                                                                                                                                                                reflected
                                                                                                                                                                                from
                                                                                                                                                                                objects
                                                                                                                                                                            

                                                                                                                                                                                Retina
                                                                                                                                                                                converts
                                                                                                                                                                                light
                                                                                                                                                                                into
                                                                                                                                                                                electrical
                                                                                                                                                                                signals
                                                                                                                                                                            

                                                                                                                                                                                Like
                                                                                                                                                                                a
                                                                                                                                                                                biological
                                                                                                                                                                                camera
                                                                                                                                                                                sensor
                                                                                                                                                                            

                                                                                                                                                    

                                                                                                                                                                                Stage
                                                                                                                                                                                    2:
                                                                                                                                                                                    Feature
                                                                                                                                                                                    Extraction
                                                                                                                                                                            

                                                                                                                                                                                Brain
                                                                                                                                                                                identifies
                                                                                                                                                                                edges,
                                                                                                                                                                                shapes,
                                                                                                                                                                                textures
                                                                                                                                                                            

                                                                                                                                                                                Recognizes
                                                                                                                                                                                patterns
                                                                                                                                                                                and
                                                                                                                                                                                structures
                                                                                                                                                                            

                                                                                                                                                                                Builds
                                                                                                                                                                                hierarchical
                                                                                                                                                                                understanding
                                                                                                                                                                            

                                                                                                                                                    

                                                                                                                                                                                Stage
                                                                                                                                                                                    3:
                                                                                                                                                                                    Object
                                                                                                                                                                                    Recognition
                                                                                                                                                                            

                                                                                                                                                                                Combines
                                                                                                                                                                                features
                                                                                                                                                                                into
                                                                                                                                                                                meaningful
                                                                                                                                                                                objects
                                                                                                                                                                            

                                                                                                                                                                                Applies
                                                                                                                                                                                context
                                                                                                                                                                                and
                                                                                                                                                                                experience
                                                                                                                                                                            

                                                                                                                                                                                Makes
                                                                                                                                                                                decisions
                                                                                                                                                                                and
                                                                                                                                                                                predictions
                                                                                                                                                                            

                                                                                                                            The
                                                                                                                            Challenge
                                                                                                                            for
                                                                                                                            Computers:
                                                                                                                        

                                                                                                                                                                Human:
                                                                                                                                                                Light
                                                                                                                                                                →
                                                                                                                                                                Eyes
                                                                                                                                                                →
                                                                                                                                                                Brain
                                                                                                                                                                →
                                                                                                                                                                Understanding
                                                                                                                                                                (instant)
                                                                                                                                                            

                                                                                                                                                                Computer:
                                                                                                                                                                Pixels
                                                                                                                                                                →
                                                                                                                                                                Algorithms
                                                                                                                                                                →
                                                                                                                                                                Processing
                                                                                                                                                                →
                                                                                                                                                                Recognition
                                                                                                                                                                (we
                                                                                                                                                                    need
                                                                                                                                                                    to
                                                                                                                                                                    build
                                                                                                                                                                    this!)
                                                                                                                                                            

                                                                                                                        

                                                                                                                                
                                                                                                                                    
                                                                                                                                    
                                                                                                                                
                                                                                                                            

                                                                                                                                                    Key
                                                                                                                                                        Insight:
                                                                                                                                                    Human
                                                                                                                                                    vision
                                                                                                                                                    is
                                                                                                                                                    incredibly
                                                                                                                                                    sophisticated
                                                                                                                                                    -
                                                                                                                                                    we
                                                                                                                                                    need
                                                                                                                                                    to
                                                                                                                                                    break
                                                                                                                                                    it
                                                                                                                                                    down
                                                                                                                                                    into
                                                                                                                                                    steps
                                                                                                                                                    computers
                                                                                                                                                    can
                                                                                                                                                    follow!
                                                                                                                                                

                                                                                                                            What
                                                                                                                            is
                                                                                                                            an
                                                                                                                            Image
                                                                                                                            to
                                                                                                                            a
                                                                                                                            Computer?
                                                                                                                        
                                                                                                                            From
                                                                                                                            Visual
                                                                                                                            Experience
                                                                                                                            to
                                                                                                                            Numbers
                                                                                                                        
                                                                                                                                                                Human
                                                                                                                                                                Perspective:
                                                                                                                                                            
                                                                                                                                                                "I
                                                                                                                                                                see
                                                                                                                                                                a
                                                                                                                                                                beautiful
                                                                                                                                                                sunset
                                                                                                                                                                with
                                                                                                                                                                orange
                                                                                                                                                                and
                                                                                                                                                                purple
                                                                                                                                                                clouds"
                                                                                                                                                            
                                                                                                                                                                Computer
                                                                                                                                                                Perspective:
                                                                                                                                                            
                                                                                                                                                                A
                                                                                                                                                                3D
                                                                                                                                                                array
                                                                                                                                                                of
                                                                                                                                                                numbers:
                                                                                                                                                            
                                                                                                                                                                Height:
                                                                                                                                                                1080
                                                                                                                                                                pixels
                                                                                                                                                            
                                                                                                                                                                Width:
                                                                                                                                                                1920
                                                                                                                                                                pixels
                                                                                                                                                            
                                                                                                                                                                Channels:
                                                                                                                                                                3
                                                                                                                                                                (Red,
                                                                                                                                                                Green,
                                                                                                                                                                Blue)
                                                                                                                                                            
                                                                                                                                                                Total:
                                                                                                                                                                6,220,800
                                                                                                                                                                individual
                                                                                                                                                                numbers!
                                                                                                                                                            
                                                                                                                            The
                                                                                                                            Fundamental
                                                                                                                            Transformation:
                                                                                                                        
                                                                                                                                                                                Visual
                                                                                                                                                                                Scene
                                                                                                                                                                            
                                                                                                                                                                                Light
                                                                                                                                                                                Rays
                                                                                                                                                                            
                                                                                                                                                                                Camera
                                                                                                                                                                                Sensor
                                                                                                                                                                            
                                                                                                                                                                                Digital
                                                                                                                                                                                Numbers
                                                                                                                                                                            
                                                                                                                                                                                Image
                                                                                                                                                                                Array
                                                                                                                                                                            
                                                                                                                            Simple
                                                                                                                            Example
                                                                                                                            -
                                                                                                                            A
                                                                                                                            3×3
                                                                                                                            Grayscale
                                                                                                                            Image:
                                                                                                                        
                                                                                                                                                                Visual:
                                                                                                                                                                A
                                                                                                                                                                small
                                                                                                                                                                dark
                                                                                                                                                                square
                                                                                                                                                            
                                                                                                                                                                Computer
                                                                                                                                                                    sees:
                                                                                                                                                            
                                                                                                                                                        [10, 15, 12]
[18, 200, 25]
[14, 20, 16]

                                                                                                                                                                Where
                                                                                                                                                                each
                                                                                                                                                                number
                                                                                                                                                                represents
                                                                                                                                                                brightness
                                                                                                                                                                (0=black,
                                                                                                                                                                255=white)
                                                                                                                                                            
                                                                                                                                                The
                                                                                                                                                    Magic:
                                                                                                                                                Every
                                                                                                                                                digital
                                                                                                                                                image
                                                                                                                                                is
                                                                                                                                                just
                                                                                                                                                a
                                                                                                                                                carefully
                                                                                                                                                organized
                                                                                                                                                collection
                                                                                                                                                of
                                                                                                                                                numbers
                                                                                                                                                that,
                                                                                                                                                when
                                                                                                                                                displayed
                                                                                                                                                correctly,
                                                                                                                                                recreate
                                                                                                                                                the
                                                                                                                                                visual
                                                                                                                                                experience!

                                                                                                                            Pixels
                                                                                                                            -
                                                                                                                            The
                                                                                                                            Building
                                                                                                                            Blocks
                                                                                                                            of
                                                                                                                            Digital
                                                                                                                            Vision
                                                                                                                        

                                                                                                                            The
                                                                                                                            Smallest
                                                                                                                            Units
                                                                                                                            of
                                                                                                                            Visual
                                                                                                                            Information
                                                                                                                        

                                                                                                                                                                What
                                                                                                                                                                is
                                                                                                                                                                a
                                                                                                                                                                Pixel?
                                                                                                                                                            

                                                                                                                                                                Short
                                                                                                                                                                    for:
                                                                                                                                                                "Picture
                                                                                                                                                                Element"
                                                                                                                                                            

                                                                                                                                                                Think
                                                                                                                                                                    of
                                                                                                                                                                    it
                                                                                                                                                                    as:
                                                                                                                                                                The
                                                                                                                                                                smallest
                                                                                                                                                                dot
                                                                                                                                                                that
                                                                                                                                                                makes
                                                                                                                                                                up
                                                                                                                                                                a
                                                                                                                                                                digital
                                                                                                                                                                image
                                                                                                                                                            

                                                                                                                                                                Real-world
                                                                                                                                                                    analogy:
                                                                                                                                                                Like
                                                                                                                                                                tiles
                                                                                                                                                                in
                                                                                                                                                                a
                                                                                                                                                                mosaic
                                                                                                                                                                artwork
                                                                                                                                                            

                                                                                                                                                                Pixel
                                                                                                                                                                Characteristics:
                                                                                                                                                            

                                                                                                                                                                Position:
                                                                                                                                                                (x,
                                                                                                                                                                y)
                                                                                                                                                                coordinates
                                                                                                                                                                in
                                                                                                                                                                the
                                                                                                                                                                image
                                                                                                                                                            

                                                                                                                                                                Intensity:
                                                                                                                                                                Brightness
                                                                                                                                                                value
                                                                                                                                                                (usually
                                                                                                                                                                0-255)
                                                                                                                                                            

                                                                                                                                                                Color:
                                                                                                                                                                Combination
                                                                                                                                                                of
                                                                                                                                                                Red,
                                                                                                                                                                Green,
                                                                                                                                                                Blue
                                                                                                                                                                values
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Resolution
                                                                                                                            Impact:
                                                                                                                        

                                                                                                                                                                            Low
                                                                                                                                                                            Resolution
                                                                                                                                                                            (10×10
                                                                                                                                                                            pixels)
                                                                                                                                                                        

                                                                                                                                                                            Very
                                                                                                                                                                            blocky,
                                                                                                                                                                            hard
                                                                                                                                                                            to
                                                                                                                                                                            recognize
                                                                                                                                                                        

                                                                                                                                                                            Medium
                                                                                                                                                                            Resolution
                                                                                                                                                                            (100×100)
                                                                                                                                                                        

                                                                                                                                                                            Basic
                                                                                                                                                                            shapes
                                                                                                                                                                            visible
                                                                                                                                                                        

                                                                                                                                                                            High
                                                                                                                                                                            Resolution
                                                                                                                                                                            (1000×1000)
                                                                                                                                                                        

                                                                                                                                                                            Sharp,
                                                                                                                                                                            detailed
                                                                                                                                                                            image
                                                                                                                                                                        

                                                                                                                            Real
                                                                                                                                Example:
                                                                                                                        

                                                                                                                            Smartphone:
                                                                                                                            12
                                                                                                                            megapixels
                                                                                                                            (4000×3000)
                                                                                                                        

                                                                                                                            4K
                                                                                                                            TV:
                                                                                                                            8.3
                                                                                                                            megapixels
                                                                                                                            (3840×2160)
                                                                                                                        

                                                                                                                            Human
                                                                                                                            eye
                                                                                                                            equivalent:
                                                                                                                            ~576
                                                                                                                            megapixels!
                                                                                                                        

                                                                                                                            Mathematical
                                                                                                                            Representation:
                                                                                                                        

                                                                                                                                                                Grayscale
                                                                                                                                                                Image:
                                                                                                                                                                I(x,y)
                                                                                                                                                                =
                                                                                                                                                                intensity
                                                                                                                                                                value
                                                                                                                                                            

                                                                                                                                                                Color
                                                                                                                                                                Image:
                                                                                                                                                                I(x,y)
                                                                                                                                                                =
                                                                                                                                                                [R(x,y),
                                                                                                                                                                G(x,y),
                                                                                                                                                                B(x,y)]
                                                                                                                                                            

                                                                                                                        

                                                                                                                                
                                                                                                                                    
                                                                                                                                    
                                                                                                                                
                                                                                                                            

                                                                                                                                                    Key
                                                                                                                                                        Insight:
                                                                                                                                                    More
                                                                                                                                                    pixels
                                                                                                                                                    =
                                                                                                                                                    more
                                                                                                                                                    detail,
                                                                                                                                                    but
                                                                                                                                                    also
                                                                                                                                                    more
                                                                                                                                                    computation
                                                                                                                                                    required!
                                                                                                                                                

                                                                                                                            Grayscale
                                                                                                                            Images
                                                                                                                            -
                                                                                                                            Simplicity
                                                                                                                            in
                                                                                                                            Black
                                                                                                                            and
                                                                                                                            White
                                                                                                                        

                                                                                                                            Understanding
                                                                                                                            Single-Channel
                                                                                                                            Images
                                                                                                                        

                                                                                                                                                                What
                                                                                                                                                                is
                                                                                                                                                                Grayscale?
                                                                                                                                                            

                                                                                                                                                                An
                                                                                                                                                                image
                                                                                                                                                                where
                                                                                                                                                                each
                                                                                                                                                                pixel
                                                                                                                                                                represents
                                                                                                                                                                only
                                                                                                                                                                intensity
                                                                                                                                                                (brightness),
                                                                                                                                                                not
                                                                                                                                                                color.
                                                                                                                                                            

                                                                                                                                                                Mathematical
                                                                                                                                                                Representation:
                                                                                                                                                            

                                                                                                                                                                Grayscale
                                                                                                                                                                Image
                                                                                                                                                                =
                                                                                                                                                                2D
                                                                                                                                                                Array
                                                                                                                                                            

                                                                                                                                                                Shape:
                                                                                                                                                                (Height,
                                                                                                                                                                Width)
                                                                                                                                                            

                                                                                                                                                                Value
                                                                                                                                                                Range:
                                                                                                                                                                0
                                                                                                                                                                (black)
                                                                                                                                                                to
                                                                                                                                                                255
                                                                                                                                                                (white)
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Example
                                                                                                                            5×5
                                                                                                                            image:
                                                                                                                        

                                                                                                                    [ 0, 50, 100, 150, 255]
[ 25, 75, 125, 175, 200]
[ 50, 100, 150, 175, 150]
[ 75, 125, 175, 125, 100]
[100, 150, 200, 100, 50]

                                                                                                                

                                                                                                                            Why
                                                                                                                            Start
                                                                                                                            with
                                                                                                                            Grayscale?
                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Simpler
                                                                                                                                                                            to
                                                                                                                                                                            understand
                                                                                                                                                                        

                                                                                                                                                                            only
                                                                                                                                                                            one
                                                                                                                                                                            value
                                                                                                                                                                            per
                                                                                                                                                                            pixel
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Faster
                                                                                                                                                                            processing
                                                                                                                                                                        

                                                                                                                                                                            3x
                                                                                                                                                                            less
                                                                                                                                                                            data
                                                                                                                                                                            than
                                                                                                                                                                            color
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Many
                                                                                                                                                                            applications
                                                                                                                                                                        

                                                                                                                                                                            work
                                                                                                                                                                            fine
                                                                                                                                                                            without
                                                                                                                                                                            color
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Mathematical
                                                                                                                                                                            operations
                                                                                                                                                                        

                                                                                                                                                                            are
                                                                                                                                                                            more
                                                                                                                                                                            straightforward
                                                                                                                                                                        

                                                                                                                            Converting
                                                                                                                            Color
                                                                                                                            to
                                                                                                                            Grayscale:
                                                                                                                        

                                                                                                                            Standard
                                                                                                                                Formula:
                                                                                                                            Gray
                                                                                                                            =
                                                                                                                            0.299×Red
                                                                                                                            +
                                                                                                                            0.587×Green
                                                                                                                            +
                                                                                                                            0.114×Blue
                                                                                                                        

                                                                                                                            Why
                                                                                                                                these
                                                                                                                                weights?
                                                                                                                        

                                                                                                                            Human
                                                                                                                            eyes
                                                                                                                            are
                                                                                                                            most
                                                                                                                            sensitive
                                                                                                                            to
                                                                                                                            green
                                                                                                                        

                                                                                                                            Least
                                                                                                                            sensitive
                                                                                                                            to
                                                                                                                            blue
                                                                                                                        

                                                                                                                            This
                                                                                                                            formula
                                                                                                                            mimics
                                                                                                                            human
                                                                                                                            perception
                                                                                                                        

                                                                                                                            Real
                                                                                                                                Applications:
                                                                                                                            Medical
                                                                                                                            X-rays,
                                                                                                                            document
                                                                                                                            scanning,
                                                                                                                            edge
                                                                                                                            detection,
                                                                                                                            many
                                                                                                                            computer
                                                                                                                            vision
                                                                                                                            algorithms
                                                                                                                            start
                                                                                                                            with
                                                                                                                            grayscale!
                                                                                                                        

                                                                                                                            Color
                                                                                                                            Images
                                                                                                                            and
                                                                                                                            the
                                                                                                                            RGB
                                                                                                                            Color
                                                                                                                            Model
                                                                                                                        

                                                                                                                            Understanding
                                                                                                                            Multi-Channel
                                                                                                                            Representation
                                                                                                                        

                                                                                                                            The
                                                                                                                            RGB
                                                                                                                            Trinity:
                                                                                                                        

                                                                                                                            Every
                                                                                                                            color
                                                                                                                            you
                                                                                                                            see
                                                                                                                            on
                                                                                                                            a
                                                                                                                            screen
                                                                                                                            is
                                                                                                                            made
                                                                                                                            from
                                                                                                                            mixing
                                                                                                                            three
                                                                                                                            primary
                                                                                                                            colors:
                                                                                                                        

                                                                                                                                                                            Red
                                                                                                                                                                                channel
                                                                                                                                                                        

                                                                                                                                                                            Blue
                                                                                                                                                                                channel
                                                                                                                                                                        

                                                                                                                                                                            Green
                                                                                                                                                                                channel
                                                                                                                                                                        

                                                                                                                                                                Mathematical
                                                                                                                                                                Structure:
                                                                                                                                                            

                                                                                                                                                                Color
                                                                                                                                                                Image
                                                                                                                                                                =
                                                                                                                                                                3D
                                                                                                                                                                Array
                                                                                                                                                            

                                                                                                                                                                Shape:
                                                                                                                                                                (Height,
                                                                                                                                                                Width,
                                                                                                                                                                3)
                                                                                                                                                            

                                                                                                                                                                Each
                                                                                                                                                                pixel:
                                                                                                                                                                [R_value,
                                                                                                                                                                G_value,
                                                                                                                                                                B_value]
                                                                                                                                                            

                                                                                                                                                                Range:
                                                                                                                                                                0-255
                                                                                                                                                                for
                                                                                                                                                                each
                                                                                                                                                                channel
                                                                                                                                                            

                                                                                                                                                                Example
                                                                                                                                                                pixel
                                                                                                                                                                values:
                                                                                                                                                            

                                                                                                                                                                Pure
                                                                                                                                                                Red:
                                                                                                                                                                [255,
                                                                                                                                                                0,
                                                                                                                                                                0]
                                                                                                                                                            

                                                                                                                                                                Pure
                                                                                                                                                                Green:
                                                                                                                                                                [0,
                                                                                                                                                                255,
                                                                                                                                                                0]
                                                                                                                                                            

                                                                                                                                                                Pure
                                                                                                                                                                Blue:
                                                                                                                                                                [0,
                                                                                                                                                                0,
                                                                                                                                                                255]
                                                                                                                                                            

                                                                                                                                                                White:
                                                                                                                                                                [255,
                                                                                                                                                                255,
                                                                                                                                                                255]
                                                                                                                                                            

                                                                                                                                                                Black:
                                                                                                                                                                [0,
                                                                                                                                                                0,
                                                                                                                                                                0]
                                                                                                                                                            

                                                                                                                                                                Yellow:
                                                                                                                                                                [255,
                                                                                                                                                                255,
                                                                                                                                                                0]
                                                                                                                                                                (Red
                                                                                                                                                                +
                                                                                                                                                                Green)
                                                                                                                                                            

                                                                                                                                                                Purple:
                                                                                                                                                                [128,
                                                                                                                                                                0,
                                                                                                                                                                128]
                                                                                                                                                                (Red
                                                                                                                                                                +
                                                                                                                                                                Blue)
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Memory
                                                                                                                            Requirements:
                                                                                                                        

                                                                                                                                                                Grayscale
                                                                                                                                                                Image
                                                                                                                                                                (1920×1080):
                                                                                                                                                                2.1
                                                                                                                                                                MB
                                                                                                                                                            

                                                                                                                                                                Color
                                                                                                                                                                Image
                                                                                                                                                                (1920×1080):
                                                                                                                                                                6.2
                                                                                                                                                                MB
                                                                                                                                                                (3×
                                                                                                                                                                larger!)
                                                                                                                                                            

                                                                                                                        

                                                                                                                            Channel
                                                                                                                            Visualization:
                                                                                                                        

                                                                                                                                                The
                                                                                                                                                    Magic:
                                                                                                                                                By
                                                                                                                                                combining
                                                                                                                                                three
                                                                                                                                                grayscale
                                                                                                                                                images
                                                                                                                                                (R,
                                                                                                                                                G,
                                                                                                                                                B),
                                                                                                                                                we
                                                                                                                                                recreate
                                                                                                                                                the
                                                                                                                                                full
                                                                                                                                                spectrum
                                                                                                                                                of
                                                                                                                                                human-visible
                                                                                                                                                colors!
                                                                                                                                            

                                                                                                                        

                                                                                                                            Image
                                                                                                                            Coordinate
                                                                                                                            Systems
                                                                                                                            -
                                                                                                                            Navigating
                                                                                                                            the
                                                                                                                            Pixel
                                                                                                                            Grid
                                                                                                                        

                                                                                                                            Understanding
                                                                                                                            How
                                                                                                                            Computers
                                                                                                                            Address
                                                                                                                            Pixels
                                                                                                                        

                                                                                                                                                                The
                                                                                                                                                                Coordinate
                                                                                                                                                                System:
                                                                                                                                                            

                                                                                                                                                                Computer
                                                                                                                                                                    Graphics
                                                                                                                                                                    Convention:
                                                                                                                                                            

                                                                                                                                                                Origin
                                                                                                                                                                (0,0)
                                                                                                                                                                is
                                                                                                                                                                at
                                                                                                                                                                TOP-LEFT
                                                                                                                                                                corner
                                                                                                                                                            

                                                                                                                                                                X-axis
                                                                                                                                                                goes
                                                                                                                                                                RIGHT
                                                                                                                                                                (columns)
                                                                                                                                                            

                                                                                                                                                                Y-axis
                                                                                                                                                                goes
                                                                                                                                                                DOWN
                                                                                                                                                                (rows)
                                                                                                                                                            

                                                                                                                                                                Mathematical
                                                                                                                                                                    Convention:
                                                                                                                                                            

                                                                                                                                                                Origin
                                                                                                                                                                (0,0)
                                                                                                                                                                is
                                                                                                                                                                at
                                                                                                                                                                BOTTOM-LEFT
                                                                                                                                                                corner
                                                                                                                                                            

                                                                                                                                                                X-axis
                                                                                                                                                                goes
                                                                                                                                                                RIGHT
                                                                                                                                                            

                                                                                                                                                                Y-axis
                                                                                                                                                                goes
                                                                                                                                                                UP
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Practical
                                                                                                                            Example
                                                                                                                            -
                                                                                                                            4×3
                                                                                                                            Image:
                                                                                                                        

                                                                                                                                                                Computer
                                                                                                                                                                    Vision
                                                                                                                                                                    Coordinates:
                                                                                                                                                            

                                                                                                                                                        (0,0) (1,0) (2,0) (3,0)
(0,1) (1,1) (2,1) (3,1)
(0,2) (1,2) (2,2) (3,2)

                                                                                                                                                    

                                                                                                                                                                Array
                                                                                                                                                                    Indexing:
                                                                                                                                                            

                                                                                                                                                                image[row,
                                                                                                                                                                column]
                                                                                                                                                                =
                                                                                                                                                                image[y,
                                                                                                                                                                x]
                                                                                                                                                            

                                                                                                                                                                image[0,
                                                                                                                                                                0]
                                                                                                                                                                =
                                                                                                                                                                top-left
                                                                                                                                                                pixel
                                                                                                                                                            

                                                                                                                                                                image[2,
                                                                                                                                                                3]
                                                                                                                                                                =
                                                                                                                                                                bottom-right
                                                                                                                                                                pixel
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Why
                                                                                                                            This
                                                                                                                            Matters:
                                                                                                                        

                                                                                                                                                                            Image
                                                                                                                                                                            processing
                                                                                                                                                                            algorithms
                                                                                                                                                                        

                                                                                                                                                                            depend
                                                                                                                                                                            on
                                                                                                                                                                            correct
                                                                                                                                                                            indexing
                                                                                                                                                                        

                                                                                                                                                                            Transformations
                                                                                                                                                                        

                                                                                                                                                                            (rotation,
                                                                                                                                                                            scaling)
                                                                                                                                                                            need
                                                                                                                                                                            proper
                                                                                                                                                                            coordinates
                                                                                                                                                                        

                                                                                                                                                                            Region
                                                                                                                                                                            selection
                                                                                                                                                                        

                                                                                                                                                                            requires
                                                                                                                                                                            understanding
                                                                                                                                                                            pixel
                                                                                                                                                                            addressing
                                                                                                                                                                        

                                                                                                                                
                                                                                                                                    
                                                                                                                                    
                                                                                                                                
                                                                                                                            

                                                                                                                                                    Common
                                                                                                                                                    Confusion:
                                                                                                                                                

                                                                                                                                                    Beginner
                                                                                                                                                    mistake:
                                                                                                                                                    image[x,
                                                                                                                                                    y]
                                                                                                                                                    ❌
                                                                                                                                                

                                                                                                                                                    Correct
                                                                                                                                                    way:
                                                                                                                                                    image[y,
                                                                                                                                                    x]
                                                                                                                                                    ✅
                                                                                                                                                

                                                                                                                                                    Remember:
                                                                                                                                                    "Row
                                                                                                                                                    first,
                                                                                                                                                    Column
                                                                                                                                                    second"
                                                                                                                                                

                                                                                                                            Image
                                                                                                                            Data
                                                                                                                            Types
                                                                                                                            and
                                                                                                                            Storage
                                                                                                                        

                                                                                                                            The
                                                                                                                            Numbers
                                                                                                                            Behind
                                                                                                                            the
                                                                                                                            Pixels
                                                                                                                        

                                                                                                                            Common
                                                                                                                            Data
                                                                                                                            Types:
                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                1
                                                                                                                                                            

                                                                                                                                                                            8-bit
                                                                                                                                                                            Unsigned
                                                                                                                                                                            Integer
                                                                                                                                                                            (uint8)
                                                                                                                                                                        

                                                                                                                                                                            Range:
                                                                                                                                                                            0
                                                                                                                                                                            to
                                                                                                                                                                            255
                                                                                                                                                                        

                                                                                                                                                                            Memory:
                                                                                                                                                                            1
                                                                                                                                                                            byte
                                                                                                                                                                            per
                                                                                                                                                                            pixel
                                                                                                                                                                        

                                                                                                                                                                            Usage:
                                                                                                                                                                            Most
                                                                                                                                                                            common
                                                                                                                                                                            for
                                                                                                                                                                            display
                                                                                                                                                                            and
                                                                                                                                                                            basic
                                                                                                                                                                            processing
                                                                                                                                                                        

                                                                                                                                                                            Example:
                                                                                                                                                                            Typical
                                                                                                                                                                            JPEG
                                                                                                                                                                            images
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                2
                                                                                                                                                            

                                                                                                                                                                            16-bit
                                                                                                                                                                            Unsigned
                                                                                                                                                                            Integer
                                                                                                                                                                            (uint16)
                                                                                                                                                                        

                                                                                                                                                                            Range:
                                                                                                                                                                            0
                                                                                                                                                                            to
                                                                                                                                                                            65,535
                                                                                                                                                                        

                                                                                                                                                                            Memory:
                                                                                                                                                                            2
                                                                                                                                                                            bytes
                                                                                                                                                                            per
                                                                                                                                                                            pixel
                                                                                                                                                                        

                                                                                                                                                                            Usage:
                                                                                                                                                                            Medical
                                                                                                                                                                            imaging,
                                                                                                                                                                            scientific
                                                                                                                                                                            applications
                                                                                                                                                                        

                                                                                                                                                                            Example:
                                                                                                                                                                            X-ray
                                                                                                                                                                            images,
                                                                                                                                                                            astronomical
                                                                                                                                                                            photos
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                3
                                                                                                                                                            

                                                                                                                                                                            32-bit
                                                                                                                                                                            Float
                                                                                                                                                                            (float32)
                                                                                                                                                                        

                                                                                                                                                                            Range:
                                                                                                                                                                            0.0
                                                                                                                                                                            to
                                                                                                                                                                            1.0
                                                                                                                                                                            (normalized)
                                                                                                                                                                            or
                                                                                                                                                                            any
                                                                                                                                                                            real
                                                                                                                                                                            number
                                                                                                                                                                        

                                                                                                                                                                            Memory:
                                                                                                                                                                            4
                                                                                                                                                                            bytes
                                                                                                                                                                            per
                                                                                                                                                                            pixel
                                                                                                                                                                        

                                                                                                                                                                            Usage:
                                                                                                                                                                            Mathematical
                                                                                                                                                                            processing,
                                                                                                                                                                            machine
                                                                                                                                                                            learning
                                                                                                                                                                        

                                                                                                                                                                            Example:
                                                                                                                                                                            Deep
                                                                                                                                                                            learning
                                                                                                                                                                            networks
                                                                                                                                                                        

                                                                                                                            Storage
                                                                                                                            Format
                                                                                                                            Comparison:
                                                                                                                        

                                                                                                            

                                                                                                                            Conversion
                                                                                                                            Between
                                                                                                                            Types:
                                                                                                                        

                                                                                                                    # Convert uint8 (0-255) to float32 (0.0-1.0)
float_image = uint8_image / 255.0

# Convert float32 back to uint8
uint8_image = (float_image * 255).astype(uint8)

                                                                                                                

                                                                                                                            Basic
                                                                                                                            Image
                                                                                                                            Operations
                                                                                                                            -
                                                                                                                            Mathematical
                                                                                                                            Foundations
                                                                                                                        

                                                                                                                            The
                                                                                                                            Building
                                                                                                                            Blocks
                                                                                                                            of
                                                                                                                            Image
                                                                                                                            Processing
                                                                                                                        

                                                                                                                            Pixel-wise
                                                                                                                                Operations:
                                                                                                                            These
                                                                                                                            operations
                                                                                                                            work
                                                                                                                            on
                                                                                                                            individual
                                                                                                                            pixels
                                                                                                                            independently.
                                                                                                                        

                                                                                                                                                                1
                                                                                                                                                            

                                                                                                                                                                                Brightness
                                                                                                                                                                                Adjustment
                                                                                                                                                                            

                                                                                                                                                                                Mathematical
                                                                                                                                                                                    Formula:
                                                                                                                                                                                Output(x,y)
                                                                                                                                                                                =
                                                                                                                                                                                Input(x,y)
                                                                                                                                                                                +
                                                                                                                                                                                brightness_value
                                                                                                                                                                            

                                                                                                                                                                                Example:
                                                                                                                                                                            

                                                                                                                                                                                Original
                                                                                                                                                                                pixel:
                                                                                                                                                                                100
                                                                                                                                                                            

                                                                                                                                                                                Add
                                                                                                                                                                                brightness
                                                                                                                                                                                +50:
                                                                                                                                                                                100
                                                                                                                                                                                +
                                                                                                                                                                                50
                                                                                                                                                                                =
                                                                                                                                                                                150
                                                                                                                                                                                (brighter)
                                                                                                                                                                            

                                                                                                                                                                                Add
                                                                                                                                                                                brightness
                                                                                                                                                                                -30:
                                                                                                                                                                                100
                                                                                                                                                                                -
                                                                                                                                                                                30
                                                                                                                                                                                =
                                                                                                                                                                                70
                                                                                                                                                                                (darker)
                                                                                                                                                                            

                                                                                                                                                                                Important:
                                                                                                                                                                                Clip
                                                                                                                                                                                values
                                                                                                                                                                                to
                                                                                                                                                                                valid
                                                                                                                                                                                range
                                                                                                                                                                                [0,
                                                                                                                                                                                255]
                                                                                                                                                                            

                                                                                                                                                                2
                                                                                                                                                            

                                                                                                                                                                                Contrast
                                                                                                                                                                                Adjustment
                                                                                                                                                                            

                                                                                                                                                                                Mathematical
                                                                                                                                                                                    Formula:
                                                                                                                                                                                Output(x,y)
                                                                                                                                                                                =
                                                                                                                                                                                contrast_factor
                                                                                                                                                                                ×
                                                                                                                                                                                Input(x,y)
                                                                                                                                                                            

                                                                                                                                                                                Example:
                                                                                                                                                                            

                                                                                                                                                                                Original
                                                                                                                                                                                pixel:
                                                                                                                                                                                100
                                                                                                                                                                            

                                                                                                                                                                                High
                                                                                                                                                                                contrast
                                                                                                                                                                                (×1.5):
                                                                                                                                                                                100
                                                                                                                                                                                ×
                                                                                                                                                                                1.5
                                                                                                                                                                                =
                                                                                                                                                                                150
                                                                                                                                                                            

                                                                                                                                                                                Low
                                                                                                                                                                                contrast
                                                                                                                                                                                (×0.5):
                                                                                                                                                                                100
                                                                                                                                                                                ×
                                                                                                                                                                                0.5
                                                                                                                                                                                =
                                                                                                                                                                                50
                                                                                                                                                                            

                                                                                                                                                                3.
                                                                                                                                                                Image
                                                                                                                                                                Addition:
                                                                                                                                                            

                                                                                                                                                                Formula:
                                                                                                                                                                Result(x,y)
                                                                                                                                                                =
                                                                                                                                                                Image1(x,y)
                                                                                                                                                                +
                                                                                                                                                                Image2(x,y)
                                                                                                                                                            

                                                                                                                                                                Use
                                                                                                                                                                    case:
                                                                                                                                                                Combining
                                                                                                                                                                multiple
                                                                                                                                                                exposures,
                                                                                                                                                                noise
                                                                                                                                                                addition
                                                                                                                                                            

                                                                                                                                                                4.
                                                                                                                                                                Image
                                                                                                                                                                Subtraction:
                                                                                                                                                            

                                                                                                                                                                Formula:
                                                                                                                                                                Result(x,y)
                                                                                                                                                                =
                                                                                                                                                                |Image1(x,y)
                                                                                                                                                                -
                                                                                                                                                                Image2(x,y)|
                                                                                                                                                            

                                                                                                                                                                Use
                                                                                                                                                                    case:
                                                                                                                                                                Background
                                                                                                                                                                subtraction,
                                                                                                                                                                change
                                                                                                                                                                detection
                                                                                                                                                            

                                                                                                                        

                                                                                                                            Code
                                                                                                                            Example:
                                                                                                                        

                                                                                                                    # Brightness adjustment
bright_image = original_image + 50
bright_image = np.clip(bright_image, 0, 255)

                                                                                                                

                                                                                                                            Filtering
                                                                                                                            Operations
                                                                                                                            -
                                                                                                                            Neighborhood
                                                                                                                            Processing
                                                                                                                        

                                                                                                                            When
                                                                                                                            Pixels
                                                                                                                            Need
                                                                                                                            to
                                                                                                                            Talk
                                                                                                                            to
                                                                                                                            Their
                                                                                                                            Neighbors
                                                                                                                        

                                                                                                                                                                What
                                                                                                                                                                is
                                                                                                                                                                Image
                                                                                                                                                                Filtering?
                                                                                                                                                            

                                                                                                                                                                Unlike
                                                                                                                                                                pixel-wise
                                                                                                                                                                operations,
                                                                                                                                                                filtering
                                                                                                                                                                considers
                                                                                                                                                                the
                                                                                                                                                                neighborhood
                                                                                                                                                                around
                                                                                                                                                                each
                                                                                                                                                                pixel
                                                                                                                                                                to
                                                                                                                                                                compute
                                                                                                                                                                the
                                                                                                                                                                output.
                                                                                                                                                            

                                                                                                                                                                The
                                                                                                                                                                Convolution
                                                                                                                                                                Process:
                                                                                                                                                            

                                                                                                                                                                Place
                                                                                                                                                                a
                                                                                                                                                                small
                                                                                                                                                                matrix
                                                                                                                                                                (kernel/filter)
                                                                                                                                                                over
                                                                                                                                                                a
                                                                                                                                                                pixel
                                                                                                                                                            

                                                                                                                                                                Multiply
                                                                                                                                                                corresponding
                                                                                                                                                                values
                                                                                                                                                            

                                                                                                                                                                Sum
                                                                                                                                                                all
                                                                                                                                                                products
                                                                                                                                                            

                                                                                                                                                                Place
                                                                                                                                                                result
                                                                                                                                                                at
                                                                                                                                                                center
                                                                                                                                                                pixel
                                                                                                                                                                location
                                                                                                                                                            

                                                                                                                                                                Slide
                                                                                                                                                                kernel
                                                                                                                                                                to
                                                                                                                                                                next
                                                                                                                                                                pixel
                                                                                                                                                                and
                                                                                                                                                                repeat
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Simple
                                                                                                                            3×3
                                                                                                                            Blur
                                                                                                                            Filter
                                                                                                                            Example:
                                                                                                                        

                                                                                                                                                                Blur
                                                                                                                                                                    Kernel
                                                                                                                                                                (all
                                                                                                                                                                values
                                                                                                                                                                sum
                                                                                                                                                                to
                                                                                                                                                                1):
                                                                                                                                                            

                                                                                                                                                        [1/9 1/9 1/9]
[1/9 1/9 1/9]
[1/9 1/9 1/9]

                                                                                                                                                    

                                                                                                                                                                Original
                                                                                                                                                                    patch:
                                                                                                                                                            

                                                                                                                                                        [100 150 200]
[110 160 210]
[120 170 220]

                                                                                                                                                    

                                                                                                                                                                Calculation:
                                                                                                                                                            

                                                                                                                                                                (100×1/9
                                                                                                                                                                +
                                                                                                                                                                150×1/9
                                                                                                                                                                +
                                                                                                                                                                200×1/9
                                                                                                                                                                +
                                                                                                                                                                110×1/9
                                                                                                                                                                +
                                                                                                                                                                160×1/9
                                                                                                                                                                +
                                                                                                                                                                210×1/9
                                                                                                                                                                +
                                                                                                                                                                120×1/9
                                                                                                                                                                +
                                                                                                                                                                170×1/9
                                                                                                                                                                +
                                                                                                                                                                220×1/9)
                                                                                                                                                                =
                                                                                                                                                                1440/9
                                                                                                                                                                =
                                                                                                                                                                160
                                                                                                                                                            

                                                                                                                        

                                                                                                                            Result:
                                                                                                                            Center
                                                                                                                            pixel
                                                                                                                            becomes
                                                                                                                            160
                                                                                                                            (average
                                                                                                                            of
                                                                                                                            neighborhood)
                                                                                                                        

                                                                                                                            Why
                                                                                                                            Filtering
                                                                                                                            Works:
                                                                                                                        

                                                                                                                                                                            Blurring
                                                                                                                                                                        

                                                                                                                                                                            Averages
                                                                                                                                                                            neighboring
                                                                                                                                                                            pixels
                                                                                                                                                                        

                                                                                                                                                                            Sharpening
                                                                                                                                                                        

                                                                                                                                                                            Enhances
                                                                                                                                                                            differences
                                                                                                                                                                            between
                                                                                                                                                                            neighbors
                                                                                                                                                                        

                                                                                                                                                                            Edge
                                                                                                                                                                            Detection
                                                                                                                                                                        

                                                                                                                                                                            Finds
                                                                                                                                                                            rapid
                                                                                                                                                                            intensity
                                                                                                                                                                            changes
                                                                                                                                                                        

                                                                                                                            Edge
                                                                                                                            Detection
                                                                                                                            -
                                                                                                                            Finding
                                                                                                                            Boundaries
                                                                                                                        

                                                                                                                            Discovering
                                                                                                                            Where
                                                                                                                            Objects
                                                                                                                            Begin
                                                                                                                            and
                                                                                                                            End
                                                                                                                        

                                                                                                                                                                What
                                                                                                                                                                are
                                                                                                                                                                Edges?
                                                                                                                                                            

                                                                                                                                                                Edges
                                                                                                                                                                are
                                                                                                                                                                locations
                                                                                                                                                                where
                                                                                                                                                                pixel
                                                                                                                                                                intensity
                                                                                                                                                                changes
                                                                                                                                                                rapidly
                                                                                                                                                                -
                                                                                                                                                                they
                                                                                                                                                                often
                                                                                                                                                                correspond
                                                                                                                                                                to
                                                                                                                                                                object
                                                                                                                                                                boundaries.
                                                                                                                                                            

                                                                                                                                                                The
                                                                                                                                                                Sobel
                                                                                                                                                                Edge
                                                                                                                                                                Detector:
                                                                                                                                                            

                                                                                                                                                                Uses
                                                                                                                                                                two
                                                                                                                                                                kernels
                                                                                                                                                                to
                                                                                                                                                                detect
                                                                                                                                                                horizontal
                                                                                                                                                                and
                                                                                                                                                                vertical
                                                                                                                                                                edges:
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                                                                Sobel
                                                                                                                                                                    X
                                                                                                                                                                    (Vertical
                                                                                                                                                                    Edges):
                                                                                                                                                            

                                                                                                                                                        [-1 0 1]
[-2 0 2]
[-1 0 1]

                                                                                                                                                    

                                                                                                                                                                Sobel
                                                                                                                                                                    Y
                                                                                                                                                                    (Horizontal
                                                                                                                                                                    Edges):
                                                                                                                                                            

                                                                                                                                                        [-1 -2 -1]
[ 0  0  0]
[ 1  2  1]

                                                                                                                                                    

                                                                                                                        

                                                                                                                            Mathematical
                                                                                                                            Process:
                                                                                                                        

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                Apply
                                                                                                                                                                                Sobel
                                                                                                                                                                                X
                                                                                                                                                                                kernel
                                                                                                                                                                                →
                                                                                                                                                                                Gx
                                                                                                                                                                                (gradient
                                                                                                                                                                                in
                                                                                                                                                                                x-direction)
                                                                                                                                                                            

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                Apply
                                                                                                                                                                                Sobel
                                                                                                                                                                                Y
                                                                                                                                                                                kernel
                                                                                                                                                                                →
                                                                                                                                                                                Gy
                                                                                                                                                                                (gradient
                                                                                                                                                                                in
                                                                                                                                                                                y-direction)
                                                                                                                                                                            

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                Combine:
                                                                                                                                                                                Edge
                                                                                                                                                                                Magnitude
                                                                                                                                                                                =
                                                                                                                                                                                √(Gx²
                                                                                                                                                                                +
                                                                                                                                                                                Gy²)
                                                                                                                                                                            

                                                                                                                                                            
                                                                                                                                                        
                                                                                                                                                                

                                                                                                                                                                                Edge
                                                                                                                                                                                Direction
                                                                                                                                                                                =
                                                                                                                                                                                arctan(Gy/Gx)
                                                                                                                                                                            

                                                                                                                            Real
                                                                                                                            Example:
                                                                                                                        

                                                                                                                            Original
                                                                                                                            image
                                                                                                                            patch
                                                                                                                            (boundary
                                                                                                                            between
                                                                                                                            dark
                                                                                                                            and
                                                                                                                            light):
                                                                                                                        

                                                                                                                                                        [50 50 50]
[50 50 50]

                                                                                                                                                    

                                                                                                                                                        [200 200 200]
[200 200 200]

                                                                                                                                                    

                                                                                                                        

                                                                                                                            →
                                                                                                                            After
                                                                                                                            Sobel
                                                                                                                            filtering
                                                                                                                            →
                                                                                                                            [Strong
                                                                                                                            edge
                                                                                                                            detected]
                                                                                                                        

                                                                                                                            Applications:
                                                                                                                        

                                                                                                                            Object
                                                                                                                            boundary
                                                                                                                            detection
                                                                                                                        

                                                                                                                            Shape
                                                                                                                            analysis
                                                                                                                        

                                                                                                                            Preprocessing
                                                                                                                            for
                                                                                                                            higher-level
                                                                                                                            vision
                                                                                                                            tasks
                                                                                                                        

                                                                                                                            Medical
                                                                                                                            image
                                                                                                                            analysis
                                                                                                                        

                                                                                                                            Image
                                                                                                                            Histograms
                                                                                                                            -
                                                                                                                            Understanding
                                                                                                                            Pixel
                                                                                                                            Distributions
                                                                                                                        

                                                                                                                            The
                                                                                                                            Statistical
                                                                                                                            Portrait
                                                                                                                            of
                                                                                                                            an
                                                                                                                            Image
                                                                                                                        

                                                                                                                                                                What
                                                                                                                                                                is
                                                                                                                                                                an
                                                                                                                                                                Image
                                                                                                                                                                Histogram?
                                                                                                                                                            

                                                                                                                                                                A
                                                                                                                                                                graph
                                                                                                                                                                showing
                                                                                                                                                                how
                                                                                                                                                                many
                                                                                                                                                                pixels
                                                                                                                                                                have
                                                                                                                                                                each
                                                                                                                                                                intensity
                                                                                                                                                                value
                                                                                                                                                                in
                                                                                                                                                                the
                                                                                                                                                                image.
                                                                                                                                                            

                                                                                                                                                                Mathematical
                                                                                                                                                                Definition:
                                                                                                                                                            

                                                                                                                                                                Histogram:
                                                                                                                                                                H(k)
                                                                                                                                                                =
                                                                                                                                                                number
                                                                                                                                                                of
                                                                                                                                                                pixels
                                                                                                                                                                with
                                                                                                                                                                intensity
                                                                                                                                                                k
                                                                                                                                                            

                                                                                                                                                                where
                                                                                                                                                                k
                                                                                                                                                                ranges
                                                                                                                                                                from
                                                                                                                                                                0
                                                                                                                                                                to
                                                                                                                                                                255
                                                                                                                                                                for
                                                                                                                                                                8-bit
                                                                                                                                                                images
                                                                                                                                                            

                                                                                                                                                                

                                                                                                                                                                    
                                                                                                                                                                

                                                                                                                        

                                                                                                                            Interpreting
                                                                                                                            Histograms:
                                                                                                                        

                                                                                                                                                                            Dark
                                                                                                                                                                            Image
                                                                                                                                                                        

                                                                                                                                                                            Histogram
                                                                                                                                                                            concentrated
                                                                                                                                                                            on
                                                                                                                                                                            the
                                                                                                                                                                            left
                                                                                                                                                                            (low
                                                                                                                                                                            values)
                                                                                                                                                                        

                                                                                                                                                                            Many
                                                                                                                                                                            pixels
                                                                                                                                                                            with
                                                                                                                                                                            values
                                                                                                                                                                            0-50,
                                                                                                                                                                            few
                                                                                                                                                                            with
                                                                                                                                                                            200-255
                                                                                                                                                                        

                                                                                                                                                                            Bright
                                                                                                                                                                            Image
                                                                                                                                                                        

                                                                                                                                                                            Histogram
                                                                                                                                                                            concentrated
                                                                                                                                                                            on
                                                                                                                                                                            the
                                                                                                                                                                            right
                                                                                                                                                                            (high
                                                                                                                                                                            values)
                                                                                                                                                                        

                                                                                                                                                                            Many
                                                                                                                                                                            pixels
                                                                                                                                                                            with
                                                                                                                                                                            values
                                                                                                                                                                            200-255,
                                                                                                                                                                            few
                                                                                                                                                                            with
                                                                                                                                                                            0-50
                                                                                                                                                                        

                                                                                                                                                                            High
                                                                                                                                                                            Contrast
                                                                                                                                                                            Image
                                                                                                                                                                        

                                                                                                                                                                            Histogram
                                                                                                                                                                            has
                                                                                                                                                                            peaks
                                                                                                                                                                            at
                                                                                                                                                                            both
                                                                                                                                                                            ends
                                                                                                                                                                        

                                                                                                                                                                            Clear
                                                                                                                                                                            separation
                                                                                                                                                                            between
                                                                                                                                                                            dark
                                                                                                                                                                            and
                                                                                                                                                                            light
                                                                                                                                                                            regions
                                                                                                                                                                        

                                                                                                                                                                            Low
                                                                                                                                                                            Contrast
                                                                                                                                                                            Image
                                                                                                                                                                        

                                                                                                                                                                            Histogram
                                                                                                                                                                            concentrated
                                                                                                                                                                            in
                                                                                                                                                                            middle
                                                                                                                                                                            range
                                                                                                                                                                        

                                                                                                                                                                            Most
                                                                                                                                                                            pixels
                                                                                                                                                                            have
                                                                                                                                                                            similar,
                                                                                                                                                                            medium
                                                                                                                                                                            intensity
                                                                                                                                                                            values
                                                                                                                                                                        

                                                                                                                            Practical
                                                                                                                            Applications:
                                                                                                                        

                                                                                                                            Exposure
                                                                                                                            Assessment:
                                                                                                                            Over/under-exposed
                                                                                                                            photos
                                                                                                                        

                                                                                                                            Image
                                                                                                                            Enhancement:
                                                                                                                            Histogram
                                                                                                                            equalization
                                                                                                                        

                                                                                                                            Thresholding:
                                                                                                                            Automatic
                                                                                                                            selection
                                                                                                                            of
                                                                                                                            cutoff
                                                                                                                            values
                                                                                                                        

                                                                                                                            Quality
                                                                                                                            Control:
                                                                                                                            Detecting
                                                                                                                            imaging
                                                                                                                            problems
                                                                                                                        

                                                                                                                            Histogram
                                                                                                                            Equalization:
                                                                                                                        

                                                                                                                            Goal:
                                                                                                                            Spread
                                                                                                                            pixel
                                                                                                                            intensities
                                                                                                                            across
                                                                                                                            full
                                                                                                                            range
                                                                                                                            [0,
                                                                                                                            255]
                                                                                                                        

                                                                                                                            Result:
                                                                                                                            Enhanced
                                                                                                                            contrast
                                                                                                                            and
                                                                                                                            better
                                                                                                                            visibility
                                                                                                                        

                                                                                                                            Image
                                                                                                                            Transformations
                                                                                                                            -
                                                                                                                            Geometric
                                                                                                                            Operations
                                                                                                                        

                                                                                                                            Moving,
                                                                                                                            Rotating,
                                                                                                                            and
                                                                                                                            Scaling
                                                                                                                            Images
                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Translation
                                                                                                                                                                            (Moving)
                                                                                                                                                                        

                                                                                                                                                                            Mathematical
                                                                                                                                                                                Formula:
                                                                                                                                                                        

                                                                                                                                                                            x'
                                                                                                                                                                            =
                                                                                                                                                                            x
                                                                                                                                                                            +
                                                                                                                                                                            dx
                                                                                                                                                                        

                                                                                                                                                                            y'
                                                                                                                                                                            =
                                                                                                                                                                            y
                                                                                                                                                                            +
                                                                                                                                                                            dy
                                                                                                                                                                        

                                                                                                                                                                            where
                                                                                                                                                                            (dx,
                                                                                                                                                                            dy)
                                                                                                                                                                            is
                                                                                                                                                                            the
                                                                                                                                                                            movement
                                                                                                                                                                            vector
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Rotation
                                                                                                                                                                        

                                                                                                                                                                            Mathematical
                                                                                                                                                                                Formula:
                                                                                                                                                                        

                                                                                                                                                                            x'
                                                                                                                                                                            =
                                                                                                                                                                            x×cos(θ)
                                                                                                                                                                            -
                                                                                                                                                                            y×sin(θ)
                                                                                                                                                                        

                                                                                                                                                                            y'
                                                                                                                                                                            =
                                                                                                                                                                            x×sin(θ)
                                                                                                                                                                            +
                                                                                                                                                                            y×cos(θ)
                                                                                                                                                                        

                                                                                                                                                                            where
                                                                                                                                                                            θ
                                                                                                                                                                            is
                                                                                                                                                                            the
                                                                                                                                                                            rotation
                                                                                                                                                                            angle
                                                                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                            Scaling
                                                                                                                                                                        

                                                                                                                                                                            Mathematical
                                                                                                                                                                                Formula:
                                                                                                                                                                        

                                                                                                                                                                            x'
                                                                                                                                                                            =
                                                                                                                                                                            sx
                                                                                                                                                                            ×
                                                                                                                                                                            x
                                                                                                                                                                        

                                                                                                                                                                            y'
                                                                                                                                                                            =
                                                                                                                                                                            sy
                                                                                                                                                                            ×
                                                                                                                                                                            y
                                                                                                                                                                        

                                                                                                                                                                            where
                                                                                                                                                                            sx,
                                                                                                                                                                            sy
                                                                                                                                                                            are
                                                                                                                                                                            scaling
                                                                                                                                                                            factors
                                                                                                                                                                        

                                                                                                                            Matrix
                                                                                                                            Representation:
                                                                                                                        

                                                                                                                            Homogeneous
                                                                                                                            coordinates
                                                                                                                            allow
                                                                                                                            us
                                                                                                                            to
                                                                                                                            represent
                                                                                                                            all
                                                                                                                            transformations
                                                                                                                            as
                                                                                                                            matrix
                                                                                                                            multiplications:
                                                                                                                        

                                                                                                                                                                Translation
                                                                                                                                                                Matrix:
                                                                                                                                                            

                                                                                                                                                        [1 0 dx]
[0 1 dy]
[0 0 1]

                                                                                                                                                    

                                                                                                                                                                Rotation
                                                                                                                                                                Matrix:
                                                                                                                                                            

                                                                                                                                                        [cos(θ) -sin(θ) 0]
[sin(θ) cos(θ) 0]
[0 0 1]

                                                                                                                                                    

                                                                                                                                                                Scaling
                                                                                                                                                                Matrix:
                                                                                                                                                            

                                                                                                                                                        [sx 0 0]
[0 sy 0]
[0 0 1]

                                                                                                                                                    

                                                                                                                        

                                                                                                                            Practical
                                                                                                                            Considerations:
                                                                                                                        

                                                                                                                            Interpolation:
                                                                                                                            What
                                                                                                                            to
                                                                                                                            do
                                                                                                                            when
                                                                                                                            new
                                                                                                                            coordinates
                                                                                                                            fall
                                                                                                                            between
                                                                                                                            pixels?
                                                                                                                        

                                                                                                                            Boundary
                                                                                                                                conditions:
                                                                                                                            How
                                                                                                                            to
                                                                                                                            handle
                                                                                                                            pixels
                                                                                                                            that
                                                                                                                            transform
                                                                                                                            outside
                                                                                                                            image
                                                                                                                            bounds?
                                                                                                                        

                                                                                                                            Quality
                                                                                                                                vs
                                                                                                                                Speed:
                                                                                                                            Different
                                                                                                                            interpolation
                                                                                                                            methods
                                                                                                                            (nearest
                                                                                                                            neighbor,
                                                                                                                            bilinear,
                                                                                                                            bicubic)
                                                                                                                        

                                                                                                                            Image
                                                                                                                            Interpolation
                                                                                                                            -
                                                                                                                            Filling
                                                                                                                            in
                                                                                                                            the
                                                                                                                            Gaps
                                                                                                                        

                                                                                                                            When
                                                                                                                            Pixels
                                                                                                                            Don't
                                                                                                                            Line
                                                                                                                            Up
                                                                                                                            Perfectly
                                                                                                                        

                                                                                                                            The
                                                                                                                            Problem:
                                                                                                                        

                                                                                                                            After
                                                                                                                            rotation
                                                                                                                            or
                                                                                                                            scaling,
                                                                                                                            new
                                                                                                                            pixel
                                                                                                                            locations
                                                                                                                            may
                                                                                                                            fall
                                                                                                                            between
                                                                                                                            original
                                                                                                                            pixel
                                                                                                                            positions.
                                                                                                                        

                                                                                                                            Interpolation
                                                                                                                            Methods:
                                                                                                                        

                                                                                                                                                    

                                                                                                                                                                1
                                                                                                                                                            

                                                                                                                                                                            Nearest
                                                                                                                                                                            Neighbor
                                                                                                                                                                        

                                                                                                                                                                            Rule:
                                                                                                                                                                            Use
                                                                                                                                                                            the
                                                                                                                                                                            value
                                                                                                                                                                            of
                                                                                                                                                                            the
                                                                                                                                                                            closest
                                                                                                                                                                            original
                                                                                                                                                                            pixel
                                                                                                                                                                        

                                                                                                                                                                            Pros:
                                                                                                                                                                            Fast,
                                                                                                                                                                            preserves
                                                                                                                                                                            original
                                                                                                                                                                            values
                                                                                                                                                                        

                                                                                                                                                                            Cons:
                                                                                                                                                                            Blocky
                                                                                                                                                                            results,
                                                                                                                                                                            aliasing
                                                                                                                                                                            artifacts
                                                                                                                                                                        

                                                                                                                                                                            Example:
                                                                                                                                                                            Need
                                                                                                                                                                            value
                                                                                                                                                                            at