import { Project } from '../types';

export const projects: Project[] = [
    // Keyhole Projects
    {
        id: 'keyhole-notification-engine',
        title: 'Smart Notification System',
        description: 'BERT based smart notification system',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['BERT', 'PyTorch', 'FastAPI', 'AWS Lambda'],
        employer: {
            logo: 'keyhole.png',
            url: 'https://keyhole.co',
            name: 'Keyhole'
        },
        details: {
            overview: 'Developed an intelligent notification system using state-of-the-art NLP models to deliver personalized, context-aware notifications to users.',
            richText: `
                <h3>Key Features</h3>
                <ul>
                    <li>BERT-based content analysis for personalized notification timing</li>
                    <li>Real-time user engagement scoring</li>
                    <li>Scalable serverless architecture on AWS</li>
                </ul>
            `,
            challenges: [
                'Processing large volumes of user interaction and tweet data in real-time.',
                'Maintaining model performance while keeping latency low.',
                'Balancing notification frequency with user engagement',
            ],
            solutions: [
                'Implemented efficient BERT model deployment using PyTorch',
                'Built serverless architecture using AWS Lambda for cost-effective scaling',
                'Created FastAPI endpoints for real-time model inference',
                'Developed automated monitoring and performance optimization pipeline'
            ],
            impact: [
                '10% increase in overall user engagement',
                'Reduced notification send volume by 30% while maintaining engagement',
                'Improved notification click-through rate by 25%'
            ],
            technologies: [
                {
                    category: 'ML/AI',
                    items: ['PyTorch', 'BERT', 'Transformers', 'scikit-learn']
                },
                {
                    category: 'Backend',
                    items: ['FastAPI', 'Python', 'Redis', 'PostgreSQL']
                },
                {
                    category: 'Infrastructure',
                    items: ['AWS Lambda', 'Docker', 'CloudWatch', 'S3']
                }
            ],
            timeline: {
                start: 'January 2024',
                end: 'July 2024'
            }
        }
    },
    {
        id: 'keyhole-pricing-analysis',
        title: 'Pricing & Packaging Analysis',
        description: 'Analyses and experiments to optimize pricing and packaging',
        image: 'https://via.placeholder.com/400x200',
        areas: ['data'],
        tags: ['Python', 'SQL', 'Looker', 'A/B Testing'],
        employer: {
            logo: 'keyhole.png',
            url: 'https://keyhole.co',
            name: 'Keyhole'
        },
        details: {
            overview: 'Led an end-to-end pricing optimization initiative combining data analysis, statistical modeling, and business intelligence to revamp product pricing strategy.',
            richText: `
                <h3>Key Components</h3>
                <ul>
                    <li>Statistical analysis of user cohorts, purchasing patterns and long term retention.</li>
                    <li>Price elasticity modeling and sensitivity testing</li>
                    <li>A/B testing framework for pricing variations</li>
                </ul>
            `,
            challenges: [
                'Analyzing complex user behavior patterns across different market segments',
                'Variablity in user cohorts and purchasing patterns',
                'Implementing pricing changes without disrupting existing customer base',
            ],
            solutions: [
                'Developed Python-based analysis pipeline for user behavior and pricing patterns',
                'Implemented sophisticated A/B testing framework for pricing experiments',
                'Created automated reporting system using SQL and Looker',
                'Built real-time monitoring dashboard for pricing impact metrics'
            ],
            impact: [
                '15% increase in new subscription conversion rate',
                '5% overall revenue growth',
                'Improved customer retention through optimized pricing tiers and individual product pricing.',
                'Established data-driven framework for future pricing decisions.'
            ],
            technologies: [
                {
                    category: 'Analysis',
                    items: ['Python', 'Pandas', 'NumPy', 'SciPy', 'Statsmodels']
                },
                {
                    category: 'Database & BI',
                    items: ['SQL', 'Looker', 'Google Analytics']
                },
            ],
        }
    },

    // Hubpay Projects
    {
        id: 'hubpay-pricing-engine',
        title: 'Dynamic Pricing Engine',
        description: 'Backend service to manage all prices and promos',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['FastAPI', 'Postgres', 'LookML', 'Redis', 'AWS', 'React'],
        employer: {
            logo: 'hubpay.png',
            url: 'https://hubpay.ae',
            name: 'Hubpay'
        },
        details: {
            overview: 'I worked on designing and then leading a team to implement a comprehensive pricing engine responsible for all prices and promos. We developed a RESTful API with FastAPI and a React frontend for parameter manipulation and outcome visualization. We also created Looker dashboards for stakeholders to monitor performance and make data-driven decisions.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>RESTful API built with FastAPI exposing pricing engine functionality</li>
                <li>Interactive React/TypeScript frontend for parameter manipulation</li>
                <li>Real-time outcome visualization dashboard</li>
                <li>Advanced A/B testing capabilities</li>
                <li>Interactive Looker dashboards for stakeholder insights</li>
            </ul>
        `,
            challenges: [
                'Ensuring competitive pricing against competitors.',
                'Safety fallbacks to prevent accidental price changes and losses.',
                'Maintaining system performance with real-time visualizations',
                'Integrating complex statistical models with user-friendly controls'
            ],
            solutions: [
                'Implemented FastAPI for high-performance backend services',
                'Developed React/TypeScript frontend with Material-UI components',
                'Created SQL and LookML-based interactive dashboards',
                'Integrated statistical methods for A/B testing and validation',
                'Built real-time visualization components for immediate feedback'
            ],
            impact: [
                'Enabled data-driven pricing decisions for business teams',
                'Significantly enhanced user engagement and conversion rates',
                'Streamlined pricing strategy implementation process',
                'Improved stakeholder visibility through clear metrics and insights'
            ],
            technologies: [
                {
                    category: 'Backend',
                    items: ['Python', 'FastAPI', 'scikit-learn', 'PostgreSQL', 'Redis']
                },
                {
                    category: 'Frontend',
                    items: ['React', 'TypeScript', 'Material-UI']
                },
                {
                    category: 'Data & Analytics',
                    items: ['LookML', 'Looker', 'A/B Testing Frameworks']
                },
            ],
        },
        action: {
            type: 'link',
            label: 'View System',
            url: 'https://hubpay.ae/platform/pricing'
        }
    },
    {
        id: 'hubpay-scraper',
        title: 'Lead Generation Pipeline',
        description: 'RAG pipeline to extract quality leads',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['Python', 'LLM', 'Web Scraping', 'NLP', 'Data Processing', 'AWS EC2', 'AWS S3', 'Docker'],
        employer: {
            logo: 'hubpay.png',
            url: 'https://hubpay.ae',
            name: 'Hubpay'
        },
        details: {
            overview: 'Wrote a smart scraping script that extracted over 500k companies and their contact information from different sources in the UAE by querying google and RAG/LLM models and NER to understand the content of resulting pages. These were then parsed through a series of custom scripts to extract the required fields and store them in a database. Quality of the leads was further determined using a combination of ML models, automated calls, and financial indicators using past data.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>Intelligent Google search querying system for company discovery</li>
                <li>High-performance web scraping infrastructure on AWS EC2</li>
                <li>Fine-tuned LLM models for contextual data extraction</li>
                <li>Automated document processing pipeline</li>
                <li>Scalable data storage and retrieval system on AWS S3</li>
            </ul>
        `,
            challenges: [
                'Processing and extracting data from diverse and unstructured web sources',
                'Handling large-scale data extraction of 500k+ companies',
                'Processing an average of 20 documents per company efficiently',
                'Handling out-of-date data and ensuring quality',
                'Ensuring high accuracy in automated data extraction',
                'Managing computational resources for LLM processing at scale'
            ],
            solutions: [
                'Implemented distributed scraping architecture on AWS EC2 for parallel processing',
                'Used RAG/LLM models and NER for accurate information extraction from varied document formats',
                'Created efficient data pipeline using AWS S3 for scalable storage',
                'Built robust error handling and retry mechanisms for reliable data collection',
                'Implemented rate limiting and proxying for sustainable web scraping',
                'Created custom ML models to determine quality of leads and predict revenue',
            ],
            impact: [
                'Successfully extracted data from 500,000+ UAE companies',
                'Processed an average of 20 documents per company',
                'Achieved high accuracy in automated data extraction using fine-tuned LLMs',
                'Created XX quality leads and $XX revenue from these companies.',
                'Enabled data-driven decision making for business teams'
            ],
            technologies: [
                {
                    category: 'Core Technologies',
                    items: ['Python', 'BeautifulSoup', 'Selenium', 'LLM Models']
                },
                {
                    category: 'Cloud Infrastructure',
                    items: ['AWS EC2', 'AWS S3', 'Docker']
                },
                {
                    category: 'Data Processing',
                    items: ['Pandas', 'NumPy', 'NLP Libraries']
                },
            ],
        },
        action: {
            type: 'link',
            label: 'View Documentation',
            url: '/projects/hubpay-scraper'
        }
    },
    {
        id: 'hubpay-dashboards',
        title: 'Interactive Business Analytics',
        description: 'Dashboards to monitor business and user metrics.',
        image: 'https://via.placeholder.com/400x200',
        areas: ['data'],
        tags: ['SQL', 'Looker', 'Data Visualization'],
        employer: {
            logo: 'hubpay.png',
            url: 'https://hubpay.ae',
            name: 'Hubpay'
        },
        details: {
            overview: 'Developed comprehensive business intelligence dashboards using Looker to provide stakeholders with real-time insights into key business metrics, focusing on pricing optimization, user behavior, and business performance.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>Real-time pricing performance monitoring and analysis</li>
                <li>Interactive cohort analysis for user behavior tracking</li>
                <li>A/B testing results visualization and analysis</li>
                <li>Automated KPI tracking and reporting</li>
                <li>Aggregated data from multiple sources into a single view</li>
            </ul>
        `,
            challenges: [
                'High volume of data and need for real-time insights',
                'Ensuring dashboard performance with large datasets',
                'Aggregating data from multiple sources',
                'Ensuring dashboard performance with large datasets',
                'Ensuring low cost and high performance'
            ],
            solutions: [
                'Used Looker to create interactive dashboards',
                'Created custom LookML models for complex metrics',
                'Aggregated data from multiple sources into a single view',
                'Ensured low cost and high performance using AWS Redshift',
            ],
            technologies: [
                {
                    category: 'Data Analysis',
                    items: ['SQL', 'LookML', 'Looker', 'BigQuery']
                },
                {
                    category: 'Infrastructure',
                    items: ['AWS', 'Redshift', 'Data Warehousing', 'ETL Pipelines']
                }
            ],
        },
    },
    {
        id: 'hubpay-cohort-analysis',
        title: 'Customer Retention Analytics & ML Pipeline',
        description: 'End to End pipeline using behavioral analysis, automated intervention',
        image: 'https://via.placeholder.com/400x200',
        areas: ['data'],
        tags: ['Pandas', 'Scikit-learn', 'PyTorch', 'MLflow'],
        employer: {
            logo: 'hubpay.png',
            url: 'https://hubpay.ae',
            name: 'Hubpay'
        },
        details: {
            overview: 'I developed and deployed an end-to-end machine learning pipeline for user behavior analysis and churn prediction, incorporating automated retraining, monitoring, and intervention systems.',
            richText: `
            <h3>Machine Learning Components</h3>
            <ul>
                <li>Survival analysis models for churn prediction using time-series behavioral data</li>
                <li>Automated feature engineering pipeline for user behavior metrics</li>
                <li>Multi-arm bandit implementation for dynamic pricing optimization</li>
                <li>Ensemble models combining transactional and behavioral signals</li>
                <li>Real-time scoring system for user engagement prediction</li>
            </ul>

            <h3>Model Architecture</h3>
            <ul>
                <li>Gradient Boosting models (XGBoost) for churn prediction</li>
                <li>Deep learning models (PyTorch) for user behavior sequence analysis</li>
                <li>Thompson Sampling for pricing optimization</li>
                <li>Random Forest models for feature importance analysis</li>
                <li>LSTM networks for time-series pattern recognition</li>
            </ul>
        `,
            challenges: [
                'Handling concept drift in user behavior patterns during market changes',
                'Managing class imbalance in churn prediction models',
                'Building robust feature engineering pipelines for real-time scoring',
                'Dealing with sparse data in new user cohorts',
                'Maintaining model performance across different user segments',
                'Implementing effective model monitoring and retraining strategies',
                'Balancing model complexity with inference time requirements',
                'Managing cold-start problems for new users'
            ],
            solutions: [
                'Implemented sliding window retraining system with MLflow for model versioning',
                'Developed custom loss functions for handling imbalanced classes',
                'Created automated feature selection pipeline based on statistical stability',
                'Built hierarchical models combining segment-specific and global patterns',
                'Implemented A/B testing framework for model deployment',
                'Designed real-time feature computation system with Redis caching',
                'Created comprehensive model monitoring dashboard with custom metrics',
                'Developed fallback heuristics for cold-start scenarios'
            ],
            impact: [
                'Improved churn prediction accuracy by 25% over baseline models',
                'Reduced false positive rate in high-risk user identification by 30%',
                'Automated 90% of feature engineering pipeline',
                'Decreased model inference time by 60% through optimization',
                'Enabled real-time scoring for all active users',
                'Increased retention rate in targeted segments by 15%'
            ],
            technologies: [
                {
                    category: 'Machine Learning',
                    items: ['PyTorch', 'Scikit-learn', 'XGBoost', 'TensorFlow', 'MLflow']
                },
                {
                    category: 'Feature Engineering',
                    items: ['Feature-tools', 'Tsfresh', 'Custom Feature Pipeline']
                },
                {
                    category: 'Model Monitoring',
                    items: ['Prometheus', 'Grafana', 'Custom Metrics Dashboard']
                },
                {
                    category: 'Data Processing',
                    items: ['Apache Spark', 'Redis', 'PostgreSQL']
                },
                {
                    category: 'ML Infrastructure',
                    items: ['Kubernetes', 'Docker', 'FastAPI', 'MLflow']
                }
            ],
        },
    },

    // Combyne Projects
    {
        id: 'combyne-churn-prediction',
        title: 'ML-Powered Churn Prediction System',
        description: 'Behavioral analytics, predictive modeling to reduce customer churn',
        image: 'https://via.placeholder.com/400x200',
        areas: ['data'],
        tags: ['Python', 'BigQuery', 'K-Means', 'MLOps'],
        employer: {
            logo: 'combyne.png',
            url: 'https://combyne.com',
            name: 'Combyne'
        },
        details: {
            overview: 'Designed and implemented an end-to-end machine learning pipeline for churn prediction, combining user behavioral analysis, predictive modeling, and automated intervention systems.',
            richText: `
            <h3>Machine Learning Architecture</h3>
            <ul>
                <li>Ensemble of predictive models combining behavioral and engagement signals</li>
                <li>Custom feature engineering pipeline for user activity patterns</li>
                <li>Automated model retraining system with performance monitoring</li>
                <li>Real-time scoring engine for immediate risk assessment</li>
                <li>Integration with notification and intervention systems</li>
            </ul>

            <h3>Model Components</h3>
            <ul>
                <li>Gradient Boosting Classifiers for churn prediction</li>
                <li>K-Means Clustering for user segmentation</li>
                <li>Time-series analysis for engagement patterns</li>
                <li>Feature importance analysis using SHAP values</li>
                <li>Custom metrics for model evaluation</li>
            </ul>
        `,
            challenges: [
                'Handling class imbalance in churn prediction',
                'Feature engineering from complex user interaction patterns',
                'Managing data quality across different user segments',
                'Implementing efficient BigQuery optimization for large-scale data processing',
                'Dealing with delayed labels in churn prediction',
                'Balancing model complexity with query cost optimization',
                'Real-time prediction serving while maintaining performance'
            ],
            solutions: [
                'Implemented weighted sampling and custom loss functions for imbalanced classes',
                'Developed automated feature selection based on statistical significance',
                'Created efficient BigQuery stored procedures for feature computation',
                'Built sliding window prediction system for early warning detection',
                'Designed efficient caching system for frequently accessed features',
                'Optimized query costs through materialized views and partitioning',
                'Implemented feature store for real-time and batch predictions'
            ],
            impact: [
                'Achieved 24% reduction in user churn over 6-month period',
                'Reduced data processing costs by 80% through query optimization',
                'Improved prediction accuracy by 35% compared to baseline',
                'Enabled real-time churn risk scoring for all active users',
                'Successfully identified 78% of potential churners before departure'
            ],
            technologies: [
                {
                    category: 'Machine Learning',
                    items: ['scikit-learn', 'XGBoost', 'K-Means', 'SHAP', 'Feature Selection Tools']
                },
                {
                    category: 'Data Processing',
                    items: ['BigQuery', 'SQL', 'Stored Procedures', 'Data Warehousing']
                },
                {
                    category: 'MLOps',
                    items: ['Model Monitoring', 'Automated Retraining', 'Feature Store']
                },
                {
                    category: 'Infrastructure',
                    items: ['Google Cloud Platform', 'Kubernetes', 'Docker']
                }
            ],
        }
    },
    {
        id: 'combyne-feed-personalization',
        title: 'AI-Powered Feed Personalization Engine',
        description: 'Using collaborative filtering, clustering, and gen-AI for personalization.',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['K-Means', 'Collaborative Filtering', 'GAN', 'BigQuery'],
        employer: {
            logo: 'combyne.png',
            url: 'https://combyne.com',
            name: 'Combyne'
        },
        details: {
            overview: 'Built a sophisticated recommendation engine combining multiple ML approaches: collaborative filtering for personalization, clustering for user segmentation, and GANs for creative content generation.',
            richText: `
            <h3>Machine Learning Components</h3>
            <ul>
                <li>Matrix Factorization-based Collaborative Filtering system</li>
                <li>Deep learning-based user embedding generation</li>
                <li>Style-aware GANs for outfit generation</li>
                <li>Real-time content scoring and ranking system</li>
                <li>Multi-modal feature extraction pipeline</li>
            </ul>

            <h3>Technical Architecture</h3>
            <ul>
                <li>Custom GAN architecture for fashion-aware generation</li>
                <li>Hybrid recommendation system combining content-based and collaborative approaches</li>
                <li>Real-time feature computation and scoring pipeline</li>
                <li>A/B testing framework for recommendation strategies</li>
                <li>Performance monitoring and automated model updates</li>
            </ul>
        `,
            challenges: [
                'Building scalable real-time recommendation system',
                'Training stable GANs for fashion item generation',
                'Handling cold-start problems for new users',
                'Balancing exploration vs exploitation in recommendations',
                'Managing computational costs of real-time scoring',
                'Ensuring diversity in recommendations',
                'Dealing with sparse user-item interaction matrix'
            ],
            solutions: [
                'Implemented efficient matrix factorization with side information',
                'Developed custom GAN architecture with style-conditioning',
                'Created hybrid recommendation strategy combining multiple signals',
                'Built content-based fallback system for cold-start cases',
                'Implemented Thompson Sampling for exploration-exploitation trade-off',
                'Designed efficient caching strategy for recommendation scores',
                'Used dimensionality reduction techniques for sparse data'
            ],
            impact: [
                'Increased daily active user engagement by 18%',
                'Achieved 80% cost reduction in BigQuery processing',
                'Improved recommendation relevance by 45%',
                'Successfully deployed GAN-based outfit generation to production',
                'Reduced feed loading latency by 60%',
                'Increased average session duration by 120%'
            ],
            technologies: [
                {
                    category: 'Deep Learning',
                    items: ['PyTorch', 'GANs', 'Neural Collaborative Filtering', 'Embeddings']
                },
                {
                    category: 'Machine Learning',
                    items: ['scikit-learn', 'K-Means', 'Matrix Factorization', 'Content-Based Filtering']
                },
                {
                    category: 'Infrastructure',
                    items: ['Google Cloud Platform', 'BigQuery', 'Kubernetes', 'Redis']
                },
                {
                    category: 'Monitoring & Testing',
                    items: ['A/B Testing Framework', 'Performance Monitoring', 'Model Validation']
                }
            ],
        }
    },
    {
        id: 'combyne-image-processing',
        title: 'Image Processing Suite',
        description: 'Image processing pipeline for product categorization and background removal',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['Computer Vision', 'SVM', 'CNN', 'PyTorch', 'Kubernetes',],
        employer: {
            logo: 'combyne.png',
            url: 'https://combyne.com',
            name: 'Combyne'
        },
        details: {
            overview: 'Developed and deployed a comprehensive image processing system for Combyne\'s extensive item library, combining multiple machine learning models to enhance product categorization and presentation.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>Automated background removal system using CNN architecture</li>
                <li>SVM-based categorization system for efficient item library management</li>
                <li>GAN-powered outfit generation system</li>
                <li>Scalable API infrastructure on Google Cloud Platform</li>
                <li>Containerized deployment with Kubernetes orchestration</li>
            </ul>
        `,
            challenges: [
                'Processing large volumes of diverse product images efficiently',
                'Maintaining high accuracy in automated categorization',
                'Achieving clean background removal across varied image types',
                'Scaling the system to handle growing product catalog',
                'Ensuring low latency for real-time processing'
            ],
            solutions: [
                'Implemented custom CNN architecture optimized for background removal',
                'Developed SVM models for multi-category classification',
                'Deployed the system on GCP using Kubernetes for scalability',
                'Created a microservices architecture for independent scaling of components',
                'Utilized Docker containers for consistent deployment across environments'
            ],
            impact: [
                '15% increase in net promoter score',
                'Significant reduction in manual categorization effort',
                'Enhanced user experience through clean product presentations',
                'Improved system scalability and reliability',
                'Enabled new features like automated outfit generation'
            ],
            technologies: [
                {
                    category: 'Machine Learning',
                    items: ['PyTorch', 'CNN', 'SVM', 'GANs', 'Computer Vision']
                },
                {
                    category: 'Infrastructure',
                    items: ['Google Cloud Platform', 'Docker', 'Kubernetes']
                },
                {
                    category: 'Development',
                    items: ['Python', 'FastAPI', 'Git']
                },
                {
                    category: 'Monitoring & Analytics',
                    items: ['Prometheus', 'Grafana', 'CloudWatch']
                }
            ],
        },
        action: {
            type: 'link',
            label: 'View Feature',
            url: 'https://combyne.com/features/image-processing'
        }
    },
    {
        id: 'combyne-outfit-generator',
        title: 'Fashion Item Matching System',
        description: 'Smart outfit generator using clustering and user activity data',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['Python', 'K-Means Clustering', 'Docker', 'GCP'],
        employer: {
            logo: 'combyne.png',
            url: 'https://combyne.com',
            name: 'Combyne'
        },
        details: {
            overview: 'Created a fashion item matching system that suggests complementary items (like pants, shoes, accessories) when a user selects a primary item (like a shirt). The system uses clustering and user activity data to make relevant suggestions.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>Item category clustering based on user interaction patterns</li>
                <li>Basic outfit completion suggestions</li>
                <li>Integration with existing item database</li>
                <li>User preference consideration</li>
            </ul>
        `,
            challenges: [
                'Categorizing items effectively from the existing database',
                'Processing user activity data to understand preferences',
                'Ensuring suggestions load quickly in the app',
                'Matching items across different categories (tops, bottoms, accessories)'
            ],
            solutions: [
                'Implemented K-means clustering to group similar fashion items',
                'Used historical user interaction data to inform suggestions',
                'Optimized database queries for quick item retrieval',
                'Created category-based matching logic for outfit completion'
            ],
            impact: [
                'Successfully implemented in production environment',
                'Improved outfit suggestion relevance',
                'Contributed to overall user engagement improvements',
                'Reduced manual effort in outfit creation'
            ],
            technologies: [
                {
                    category: 'ML/Data',
                    items: ['Python', 'K-Means Clustering', 'NumPy', 'Pandas']
                },
                {
                    category: 'Infrastructure',
                    items: ['Docker', 'Google Cloud Platform']
                },
                {
                    category: 'Database',
                    items: ['PostgreSQL', 'Query Optimization']
                }
            ]
        }
    },

    // NYU Research Projects
    {
        id: 'nyu-electoral-analysis',
        title: 'Electoral Analyses & Form Irregularity Detection',
        description: 'Understanding inequality, irregularities, voter turnout using ML, CV, NLP',
        image: 'https://via.placeholder.com/400x200',
        areas: ['research', 'data'],
        tags: ['Computer Vision', 'NLP', 'CNN'],
        employer: {
            logo: 'NYU.png',
            url: 'https://nyuad.nyu.edu/en/research.html',
            name: 'New York University'
        },
        details: {
            overview: 'Conducted comprehensive electoral analysis research under Dr. Andy Harris, focusing on two main areas: detecting irregularities in election forms using deep neural networks and predicting voter turnout through satellite imagery analysis. The research contributed to understanding spatial inequality in elections and developing automated verification systems.',
            richText: `
            <h3>Research Components</h3>
            <ul>
                <li>Form Analysis: Deep neural networks for detecting irregularities in election forms</li>
                <li>Spatial Analysis: CNN-based voter turnout prediction from satellite imagery</li>
                <li>Observer Report Analysis: Advanced NLP for processing election observer reports</li>
                <li>Integration: Combined multiple data sources for comprehensive electoral analysis</li>
            </ul>

            <h3>Technical Implementation</h3>
            <ul>
                <li>Developed convolutional neural networks for document analysis</li>
                <li>Implemented automated form irregularity detection system</li>
                <li>Created satellite image analysis pipeline with triplet loss architecture</li>
                <li>Built NLP system for observer report processing using BERT and Bi-LSTM-CRF</li>
            </ul>

            <h3>Form Analysis Methodology</h3>
            <ul>
                <li>Analyzed approximately 30,000 polling station forms from 2013 Kenyan presidential election using deep neural networks for document classification</li>
                <li>Implemented comprehensive irregularity detection system beyond simple vote tally editing</li>
                <li>Developed validation framework using cross-checks with observer reports and geographic distribution analysis</li>
                <li>Created automated pattern recognition system for form elements and signatures</li>
            </ul>

            <h3>Satellite Analysis Methodology</h3>
            <ul>
                <li>Designed custom CNN architecture with triplet loss for processing diverse geographic imagery</li>
                <li>Implemented spatial correlation analysis to understand geographic patterns in voter turnout</li>
                <li>Created feature extraction pipeline specifically for satellite imagery characteristics</li>
                <li>Developed validation system using ground truth data from multiple regions</li>
            </ul>
        `,
            challenges: [
                'Processing and analyzing approximately 30,000 polling station forms',
                'Developing reliable irregularity detection algorithms',
                'Handling diverse satellite imagery from different regions',
                'Integrating multiple data sources (forms, satellite images, observer reports)',
                'Ensuring model accuracy across different cultural and geographic contexts'
            ],
            solutions: [
                'Implemented deep neural networks for automated form analysis',
                'Developed catalog of irregularity types beyond simple vote tally editing',
                'Created custom CNN architecture with triplet loss for satellite imagery',
                'Built comprehensive validation framework for cross-checking results',
                'Designed scalable processing pipeline for multiple data types'
            ],
            impact: [
                'Successfully analyzed forms from ~30,000 polling stations',
                'Achieved 87% accuracy in voter turnout prediction',
                'Developed novel methodology for form irregularity detection',
                'Generated insights into relationship between observers and irregularities',
                'Contributed to understanding of election integrity in developing democracies'
            ],
            technologies: [
                {
                    category: 'Computer Vision',
                    items: ['Deep Neural Networks', 'Convolutional Neural Networks', 'Document Analysis', 'Satellite Image Processing']
                },
                {
                    category: 'Machine Learning',
                    items: ['Triplet Loss Architecture', 'Transfer Learning', 'Custom Model Architectures', 'Validation Frameworks']
                },
                {
                    category: 'NLP',
                    items: ['BERT', 'Bi-LSTM-CRF', 'Named Entity Recognition', 'VADER Sentiment Analysis']
                }
            ],
            timeline: {
                start: 'September 2021',
                end: 'August 2022'
            },
            links: [
                {
                    type: 'article',
                    url: 'https://doi.org/10.1016/j.electstud.2021.102411',
                    label: 'Electoral Studies Paper'
                }
            ]
        },
        action: {
            type: 'link',
            label: 'See Article',
            url: 'https://doi.org/10.1016/j.electstud.2021.102411'
        }
    },
    {
        id: 'nyu-data-entry-platform',
        title: 'Large-Scale Data Entry Platform',
        description: 'full stack app for large scale data entry',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering', 'data'],
        tags: ['MongoDB', 'Express', 'React', 'Node.js'],
        employer: {
            logo: 'NYU.png',
            url: 'https://nyuad.nyu.edu/en/research/faculty-labs-and-projects/communication-networks-lab.html',
            name: 'New York University'
        },
        details: {
            overview: 'A full-stack web application built to coordinate and manage data entry operations for NYU research projects, focusing on efficiency and data accuracy.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>Simple web interface for data entry with built-in validation</li>
                <li>Task management dashboard for coordinating team workload</li>
                <li>Progress tracking and reporting system</li>
            </ul>
        `,
            challenges: [
                'Coordinating tasks across a large remote team',
                'Ensuring data quality and consistency',
                'Managing high volume of concurrent users'
            ],
            solutions: [
                'Built intuitive interface with real-time validation',
                'Implemented MongoDB for efficient data handling',
                'Created admin dashboard for work distribution'
            ],
            impact: [
                'Successfully processed 2M+ records',
                'Coordinated 36 remote workers efficiently',
                'Reduced data entry errors by validation'
            ],
            technologies: [
                {
                    category: 'Stack',
                    items: ['Node.js', 'MongoDB', 'Express', 'React', 'HTML/CSS/JavaScript']
                }
            ],
            timeline: {
                start: 'September 2021',
                end: 'August 2022'
            }
        }
    },
    {
        id: 'nyu-web-vision',
        title: 'Web Page Evaluation Tool',
        description: 'Computer Vision tool, published at ACM UIST 2020 and ACM WebConf 2022',
        image: 'https://via.placeholder.com/400x200',
        areas: ['research', 'engineering'],
        tags: ['Computer Vision', 'Image Processing', 'Research'],
        employer: {
            logo: 'NYU.png',
            url: 'https://nyuad.nyu.edu/en/research/faculty-labs-and-projects/communication-networks-lab.html',
            name: 'New York University'
        },
        details: {
            overview: 'Worked on a computer vision-based tool developed at NYU\'s ComNets Lab for automating qualitative evaluation of web pages. The tool combines advanced computer vision algorithms like image integrals with web interaction capabilities to analyze and compare web pages systematically.',
            richText: `
            <h3>Key Features</h3>
            <ul>
                <li>Advanced component identification system for web page elements</li>
                <li>Complex action simulation including hover and click interactions</li>
                <li>Structural and functional similarity comparison between web pages</li>
                <li>Visual analysis using Image Integrals and hybrid text-image CNNs</li>
                <li>Canny Edge Detection integration for improved accuracy</li>
            </ul>
        `,
            challenges: [
                'Accurately identifying and categorizing diverse web page components',
                'Developing reliable metrics for comparing functional similarity between pages',
                'Integrating multiple computer vision algorithms effectively',
                'Creating a system that could handle dynamic web content',
                'Ensuring accuracy across different web page layouts and designs'
            ],
            solutions: [
                'Implemented hybrid CNN architecture combining text and image analysis',
                'Developed custom Image Integral algorithms for component detection',
                'Created innovative similarity metrics based on both visual and functional aspects',
                'Integrated Canny Edge Detection for improved boundary recognition',
                'Validated performance through extensive user studies'
            ],
            impact: [
                'Published research at two prestigious conferences: ACM UIST 2020 and ACM WebConf 2022',
                'Contributed significant advancements to the field of Web Simplification',
                'Created a foundation for automated web page evaluation',
                'Developed methodology validated through comprehensive user studies'
            ],
            technologies: [
                {
                    category: 'Computer Vision',
                    items: ['Image Integrals', 'Canny Edge Detection', 'CNNs', 'OpenCV']
                },
                {
                    category: 'Machine Learning',
                    items: ['PyTorch', 'TensorFlow', 'Neural Networks', 'Computer Vision Algorithms']
                },
                {
                    category: 'Development',
                    items: ['Python', 'JavaScript', 'Web Technologies', 'Computer Vision Libraries']
                },
                {
                    category: 'Analysis',
                    items: ['Statistical Analysis', 'Performance Metrics', 'User Study Tools']
                }
            ],
            links: [
                {
                    type: 'article',
                    url: 'https://dl.acm.org/doi/abs/10.1145/3379350.3416163',
                    label: 'ACM UIST 2020 Publication'
                },
                {
                    type: 'article',
                    url: 'https://dl.acm.org/doi/abs/10.1145/3485447.3512112',
                    label: 'ACM WebConf 2022 Publication'
                }
            ],
            timeline: {
                start: 'June 2019',
                end: 'May 2021'
            }
        },
        action: {
            type: 'link',
            label: 'View Latest Publication',
            url: 'https://dl.acm.org/doi/abs/10.1145/3485447.3512112'
        }
    },

    // Other Projects
    {
        id: 'imagilabs-ios',
        title: 'Mini Games for iOS',
        description: 'Developed 3 mini games for imagilabs iOS app',
        image: 'https://via.placeholder.com/400x200',
        areas: ['engineering'],
        tags: ['iOS', 'Swift', 'Figma'],
        employer: {
            logo: 'imagilabs.png',
            url: 'https://imagilabs.com',
            name: 'imagiLabs'
        },
        details: {
            overview: 'Led iOS application development at imagiLabs, focusing on user experience and app performance while working in an agile environment.',
            richText: `
            <h3>Key Achievements</h3>
            <ul>
                <li>Developed 3 mini games for imagilabs iOS app</li>
                <li>Implemented game logic, UI/UX design, and integration with existing app architecture</li>
                <li>Worked in an agile environment with daily Scrum meetings</li>
                <li>Followed Apple's design principles and coding practices</li>
            </ul>
        `,
            technologies: [
                {
                    category: 'Development',
                    items: ['Swift', 'SwiftUI', 'iOS SDK', 'Xcode']
                },
                {
                    category: 'Design',
                    items: ['Figma', 'Apple Design Guidelines']
                },
                {
                    category: 'Project Management',
                    items: ['Agile', 'Scrum']
                }
            ],
            links: [
                {
                    type: 'demo',
                    url: 'https://apps.apple.com/us/app/imagi-fun-coding-game/id1434239482',
                    label: 'App Store'
                }
            ]
        }
    },
    {
        id: 'sciences-po-resume',
        title: 'Employment Application Research System',
        image: 'https://via.placeholder.com/400x200',
        description: 'Automated web agents to scrape resumes and apply for jobs',
        areas: ['research', 'data'],
        tags: ['Web Scraping', 'Automation'],
        employer: {
            logo: 'sciences-po.png',
            url: 'https://sciencespo.fr',
            name: 'Sciences Po'
        },
        details: {
            overview: 'Led the development of an automated research system to study employment discrimination in France. The project investigated how job application callback rates varied based on candidates\' names (suggesting different ethnic origins) and gender across different regions of France. Built end-to-end infrastructure to scrape millions of real work experiences, generate controlled resume variations, and systematically test employer responses through automated job applications.',
            richText: `
            <h3>Research Methodology</h3>
            <ul>
                <li>Created identical resumes varying only in candidate names (suggesting different ethnic backgrounds) and gender</li>
                <li>Scraped and analyzed over 3 million work experiences to generate authentic, region-appropriate resume content</li>
                <li>Developed automated systems to submit controlled job applications across different French regions</li>
                <li>Built tracking systems to measure and analyze callback rates based on demographic variables</li>
            </ul>

            <h3>Technical Implementation</h3>
            <ul>
                <li>Large-scale scraping system to collect authentic work experiences and job postings</li>
                <li>Resume generation engine that created controlled variations while maintaining consistency</li>
                <li>Automated application system that maintained careful timing and distribution patterns</li>
                <li>Real-time analytics dashboard tracking regional callback rates by demographic variables</li>
            </ul>

            <h3>Data Management</h3>
            <ul>
                <li>Centralized database of scraped work experiences with demographic tagging</li>
                <li>Systematic tracking of applications and responses</li>
                <li>Google Sheets integration for collaborative research analysis</li>
                <li>Automated bi-daily dashboard updates for research team monitoring</li>
            </ul>
        `,
            challenges: [
                'Collecting and processing over 3 million work experiences while maintaining data quality',
                'Designing controlled resume variations that isolated specific variables',
                'Implementing efficient storage and retrieval for large-scale application tracking',
                'Creating reproducible application patterns across different regions',
                'Maintaining consistent data quality across multiple variables'
            ],
            solutions: [
                'Developed robust scraping pipeline with comprehensive error handling and validation',
                'Created systematic resume generation framework with controlled parameter variation',
                'Implemented Google Sheets integration for efficient data management and collaboration',
                'Built automated tracking system for application outcomes',
                'Designed comprehensive dashboard for real-time analysis visualization'
            ],
            impact: [
                'Successfully processed and analyzed over 3 million work experiences',
                'Reduced manual data processing work by over 80%',
                'Generated statistically significant research data across multiple French regions',
                'Enabled quantitative analysis of regional employment patterns',
                'Contributed to academic research on labor market dynamics'
            ],
            technologies: [
                {
                    category: 'Data Collection',
                    items: ['Python', 'Selenium', 'BeautifulSoup', 'Scrapy']
                },
                {
                    category: 'Data Processing',
                    items: ['Pandas', 'NumPy', 'Data Processing Pipelines']
                },
                {
                    category: 'Storage & Integration',
                    items: ['Google Sheets API', 'SQL', 'RESTful APIs']
                },
                {
                    category: 'Analysis & Visualization',
                    items: ['matplotlib', 'seaborn', 'plotly']
                }
            ],
            links: [
                {
                    type: 'article',
                    url: 'https://dares.travail-emploi.gouv.fr/publication/discrimination-lembauche-selon-le-sexe-les-enseignements-dun-testing-de-grande-ampleur',
                    label: 'Research Paper (Discrimination on the basis of Sex)'
                },
                {
                    type: 'article',
                    url: 'hhttps://shs.hal.science/halshs-03693346/document',
                    label: 'Research Paper (Discrimination on the basis of Name Origin)'
                }

            ],
            timeline: {
                start: 'February 2019',
                end: 'June 2020'
            }
        }
    },
];
