12:["$","$L13",null,{"locale":"en","now":"$D2026-07-21T08:14:10.992Z","timeZone":"UTC","messages":{"Tmy-page":"My Page","Tlogout":"Logout","Tcontact-us":"Contact Us","Tlogin":"Login","Tcredit-label":"Credit","Tdiagnosis-btn":"Diagnosis","Tupgrade-plan-btn":"Upgrade Plan","Tgoogle-continue":"Continue with Google account","Tgoogle-connecting":"Connecting…","Tgoogle-or":"or","Tgoogle-consent-pre":"By signing up or logging in, you agree to our ","Tgoogle-consent-terms":"Terms of Service","Tgoogle-consent-sep1":", ","Tgoogle-consent-collect":"Collection and Use of Personal Information","Tgoogle-consent-mid":", and ","Tgoogle-consent-privacy":"Privacy Policy","Tgoogle-consent-post":".","Tgoogle-err-cancelled":"Login was cancelled. Please try again.","Tgoogle-err-failed":"Google authentication failed. Please try again later.","Tgoogle-err-network":"Your network connection is unstable. Please check your connection.","Tgoogle-err-popup-blocked":"Your browser blocked the popup. Please allow popups and try again.","Tgoogle-link-title":"Link your existing account","Tgoogle-link-desc":"An account already exists with this email. Enter your password to link it with your Google account.","Tgoogle-link-password-placeholder":"Enter your existing password.","Tgoogle-link-submit":"Link and log in","Tgoogle-link-failed":"Incorrect password or linking failed.","Tgoogle-welcome-title":"Welcome!","Tgoogle-welcome-desc-1":"Your Data Clinic sign-up is complete.","Tgoogle-welcome-desc-2":"Start your data diagnosis right now.","Tgoogle-welcome-start":"Get started","Tgoogle-welcome-later":"Set up later","T1dd5d8c1":"The report is not yet available.","T1dd5d8ef":"Final report","T1dd5d8ef-generate-diagnostic-report":"Generate Diagnostic Report","T1dd5d8f0":"Action plan","T1dd5d8f0-improvement-suggestions":"Improvement Suggestions","T1dd5d8f4":"Available","T1dd5d8f5":"Plan upgrade required","T1dd5d8f6":"View Details","T1dd5d8f8":"Diagnosis Inquiry","T1dd5d8f9":"Apply for Diagnostic Consultation","T1dd5d8fa":"Request Consultation","T1dd5d8fd":"Your Diagnostic Steps","T1dd5d900":"Diagnostic Credits","T1dd5d901":"Plan upgrade required","T1dd5d904":"To request a diagnosis","T1dd5d907":"Monthly Plan","T1dd5d908":"Annual Plan","T7c1b0b76":"Recommended","T1dd5d906":"Diagnosis","T1dd5d909":"File uploaded successfully!","T1dd5d910":"The file exceeds 1TB and cannot be uploaded.","T1dd5d911":"The ZIP file must contain both 'test/' and 'train/' folders at the top level.","T1dd5d912":"Unsupported file type(s) detected.","T1dd5d913":"Re-upload File","T1dd5d915":"Data upload completed!","T1dd5d916":"Number of Images for Diagnosis","T1dd5d918":"Upload File","T1dd5d918a":"Upload the .zip","T1dd5d924-credits-required-for-this-run":"Credits Required for This Run","T1dd5d927-balance-after-run":"Balance After Run ","T1dd5d931":"Re-upload Data","T1dd5d933-continue":"Continue","T1dd5d935":"Supported image formats: jpg, png, jpeg","T1dd5d936":"Download the sample","T1dd5d937":"Get the template dataset to see the required folder structure.","T1dd5d939":"Class-wise images in the test folder","T1dd5d939ac":"Prepare a /test folder","T1dd5d940":"Create sub-folders for each class and place one sample image in each.","T1dd5d941":"The /test folder is not included in the diagnosis; it’s used only for quality-enhancement checks.","T1dd5d943a":"Create matching class sub-folders and add one sample image per class.","T1dd5d942":"Class-wise images in the train folder","T1dd5d942a":"Prepare a /train folder","T1dd5d944":"Zip everything","T1dd5d945":"Compress both /test and /train into a single .zip file.","T1dd5d947":"Upload your archive (max 1 TB). We’ll take it from there!","T1dd5d948":"Data Clinic Dataset Format Preview","T1dd5d939a":"Example","T1dd5d942af":"Edit completed","T1dd5d946a":"Re-upload completed","Tmktpop001":"Get the Data Quality Management Guidebook

and a Free Diagnosis Coupon!","Tmktpop002":"Data quality management report","Tmktpop003":"Agree to receive marketing now and we'll send you a quality diagnosis coupon for 10,000 images

and our data quality management guidebook right away!","Tmktpop004":"Agree to receive","Tmktpop005":"Don't show again today","Tmktcst001":"Marketing communications opt-in","Tmktcst002":"Receive useful updates like events and newsletters.","Tmktcst003":"Marketing communications opt-in terms","Tmktcst004":"Open marketing communications opt-in terms in a new window","Td66f9344":"Pre-learned","Tix0ouki6":"Error resending email:","Tq0a5isgp":"Please confirm your current password.","Trccmm36z":"Sign in is required.","Tf52b1a1d":"of","Te9004be7":"Email verification has not been completed.","T2788a747":"Send email verification","T2c7ddb73":"Please complete email verification!","T34310a68":"","T10795540":"Please check the email you sent","T3ce81313":"check","T6bd7a1f2":"After email verification, instructions on using paid membership are provided.","Teda30773":"It will be conducted by email.","T02cbe4a7":"Contact Customer Service","T6eb5cd03":"First name","Tl04crs58":"Name is a required field.","Taf9b1ee2":"email","Tsyfj3p54":"Email is a required field.","Tlmgpt2fg":"Please enter your username in email format.","Te98a0ad6":"Please enter the name you entered when signing up.","Tf29b8bf9":"Please enter the email address you entered when signing up.","Tb1071cc4":"Please try again!","T1dd5d8a4":"Matching","Ta3dc6d12":"There is no member information.","Taffee4a4":"If you forgot your password","Te2d8c220":"You can reset it.","Tibdk735w":"Enter a name.","Tyep7l8gp":"Enter your email.","Ta731b816":"Issue a temporary password","T2b9xluw6":"Please enter your password.","To9i02qy9":"Minimum password length is 9 characters.","Tnwcpzgaz":"Maximum password length is 15 characters.","Tcd504c7c":"password","Td1ogyozm":"Password must contain 9-15 characters with a combination of letters, numbers, and special characters.","Tbb4dac5b":"Email or password","Te2ff501a":"You entered it incorrectly.","T8c0deee2":"Please log in.","T2228c594":"log in","T2329b638":"Let us guide you through a wonderful data exploration!","Thf4bsh51":"Enter your password","Tq1b9tjl6":"Keep me signed in","Tgou400sb":"Remember email","T7f16ec25":"join the membership","T48b5a76d":"find password","T383cb90f":"Free to use!","T2c53799b":"We provide quality assessment reports for selected public datasets, allowing you to experience the basic functions of Data Clinic.","T2c53799b-experience-data-clinics-core-features-no-cost-no-credits-required-using-curated-public-datasets":"Experience Data Clinic’s core features—no cost, no credits required—using curated public datasets.","Tm4zzu21l":"10,000 won/m","T54619f82":"We provide diagnostic reports for all publicly available datasets selected by Pebblous. The list of data is continuously updated.","T5d1819a0":"Up to 200,000 diagnostic credits provided","Tdvxgc9rr":"5 million won/m","T3c9f5771":"Handle mass-scale diagnostics and advanced quality-improvement services in one package.","Tc88008bf":"Up to 2 million diagnostic credits provided","T5d137189":"Please enter.","T0d6f577b":"Password","T58513a30":"verify password","Td321c0e2":"Last name","T8f3k9p2m":"This field is required.","Tx7h4n9v5":"You must agree to the Terms of Service.","Tm2c5r8k4":"You must agree to the collection and use of personal information.","Tw9b6d3j7":"You must agree to the Privacy Policy.","Tq5y8m1l4":"Email verification is required.","Th4k7p9s2":"Password must be at least 8 characters long.","Tn3f6v9x5":"Passwords do not match.","Tb8l2c5h7":"Please enter password confirmation.","Tw6s9h2n5":"First name is required.","Ty1f4b7m3":"Last name is required.","Tk3m9p5x8":"This email address has already been registered. Please log in with that email address.","Tw2h6n9c4":"Agree to all","Tr7b4k8m2":"(Required) I agree to the Terms of Service","Tl5f9s3v7":"Terms of Service","Td1h4n7x9":"(Required) I agree to the collection and use of personal information","Tp8c2m6b4":"Personal Information Collection and Use Agreement","Tq4k7v1s9":"(Required) I agree to the Privacy Policy","Ty6n9h3f5":"Privacy Policy","Tm2x5b8l4":"(Optional) I would like to receive Data Clinic notification emails","Tj7p4s1c6":"Marketing Communications Agreement","Th3v6m9n2":"Verification completed","T4ec021aa":"For use of Data Clinic","Tfc086053":"You must agree to the Terms of Use.","T6fef9a4f":"See more details","Tfb19be44":"Email","Tbe433920":"Continue","T891129a1":"Name","T8bdecb25":"Please select a rate plan.","T8c27c4bb":"Final step!","T81569912":"Rate Inquiry","T80da1796":"Inquiry for using Data Clinic","T8d393906":"Collaboration proposal with Pebblous","Tacf0c290":"Request public dataset diagnostics","Tdd9ed209":"Text, multimodal, structured data diagnostic inquiry","T683dc437":"Contact us for quality improvement (data bulking/diet)","T5fb8820b":"Apply for diagnosis Inquiry about data upload error","Te8806939":"Other inquiries","Tu1ini81m":"Inquiry type is required.","T930xef8v":"Description can be up to 10,000 characters.","Ta8717ab2":"Consultation request completed!","T256ac67a":"About Pebblous Data Clinic","T1ab878a1":"Ask anything!","T0d050bb2":"We will respond within one day (excluding holidays).","T1495884c":"Through","T35972b61":"We will respond within one day (excluding holidays).","Tmmdznnl2":"Please enter the email where you would like to receive the response.","T7yezfk22":"Please select an inquiry type.","Te7f6e0de":"Send Inquiry","Tzs8g4n98":"Please provide detailed information about your inquiry.","T8799800a":"Go to log in","Tbf60842a":"Credit recharge application completed!","Tc4a2c494":"URL copied successfully!","T2dad0a16":"Copy failed!","T735c4a34":" items","T2dad0a18":"Copy failed","T27f8d2f1":"Share","T221aff4d":"Data Basic Information","T2377c4cc":"Information that Pebblous has added to the basic information of the data source, such as subject/field, etc.","T023caec2":"Data Subject/Field","T9a98dec1":"Number of data","Taf8a79f5":"Data capacity","T900c4aa7":"Data Type","T87047581":"Areas of application","T11724c5f":"Labeling Presence","Tda1ec7ce":"Basic information for diagnosis","T7fb586ff":"Information about diagnostically valid data obtained through basic analysis of the dataset.","Tbe6fe560":"Data information after preprocessing","T82c0241f":"This is the task that the AI wants to learn from the dataset. The preprocessing structure of the data varies depending on the intended task.","T7f39d522":"Preprocessing structure","T0ffe8ddd":"Diagnostic target data information","Tbbbebd28":"Indicates whether class-specific diagnostic results are generated.","T64648a75":"Class Unit Diagnosis","T7d4522e3":"Number of diagnostic data","Tb774b37a":"Image color channels","T98087f7f":"Channel information of the original images within the dataset.","T5847047f":"Original channel information","Ta6067f49":"Diagnostic conversion channel","Tcf64f990":"Want to see more detailed diagnostic results?","Tebf425b9":"Go to original link","T148b1b08":"Completed membership withdrawal","T1db64059":"We regret that you have decided to withdraw from the Data Clinic service.","Tebfea42d":"If you leave your valuable opinion,","T2077fa17":"We will definitely take note of this to provide better service.","Tac1aee38":"Please indicate the reason for leaving the service. (Multiple selections possible)","Tcbf32a0c":"Please provide a detailed reason for your withdrawal.","Tb33eb638":"To unsubscribe","Td9de218e":"cancellation","Td460af88":"Data Clinic paid membership","T53d71ec1":"I'm sorry you had to cancel.","T98e686ec":"Please indicate the reason for service termination. (Multiple selections possible)","Tc101963c":"Please provide a detailed reason for canceling your plan.","T812ce8d9":"Apply for cancellation consultation","T56745e8f":"Promo code applied!","T55810ef5":"contact","Tac862295":"Edit my information","T1f1e84fb":"Edit completed","T47a3c9b8":"Complete my information modification!","T29b4eeb0":"Promotion Code","T93c8d6e5":"Please enter a valid promotional code","Teeeb805b":"Change your password","T99c4150a":"Company/Affiliation","Tc7b3f3b4":"Application complete!","Te8e0c911":"apply","T1330284z":"The credit will be applied immediately and the plan with the promo code will start after your","T4vmgrf2a":"Existing","T0y1426pu":"plan","T2ff78c09":"Unsubscribe from Data Clinic Service","T44d7s97w":"ends","T9c9bb19f":"Current password must be at least 8 characters long.","T62a269f7":"Current password","T4ec021ab":"Please enter your current password.","T4ec021ac":"Current password must be at least 9 characters long.","T4ec021ad":"Current password cannot exceed 15 characters.","T4ec021ae":"New password must be at least 8 characters long.","T4ec021af":"Please enter your new password.","T4ec021ag":"New password must be at least 9 characters long.","T4ec021ah":"New password cannot exceed 15 characters.","T4ec021ai":"Password confirmation does not match.","Tddcc9362":"New Password","T4ec021aj":"Please confirm your new password.","T4ec021ak":"Password confirmation must be at least 9 characters long.","T4ec021al":"Password confirmation cannot exceed 15 characters.","T4ec021am":"Password change completed!","T4ec021an":"Enter your current password","T4ec021ao":"Enter your new password","T5360f0d4":"Confirm new password","Tb101c881":"Change Password","T0dd8df82":"Change Completed","T56f45361":"Current password mismatch","Tcf116ec2":"A temporary password has been sent to you.","T09209044":"After logging in, enter your desired password.","T66f73223":"Please reset it","T3639e73b":"Sample","T69fc44c5":"Sample Diagnostic Report PDF","T501bd268":"View a sample Data Clinic Diagnostic Report","Taf473a15":"Diagnostic Report Web","T465b9304":"Go see","Tcc08ccf5":"Diagnostic Report PDF","T612ca27b":"Download","T9faca334":"Before downloading","T01e48f30":"Please complete the login.","T8da5b3cb":"","Tbde6fc58":"In use","Te8df8def":"Data Clinic","T7c1b0b75":"What Is Data Clinic?","Ta047fead":"Data Clinic Technology","T7ab69980":"Rate Plan","Te8a2bb75":"Dataset","T6e142bed":"Diagnostic Report","T051d3a98":"Diagnostic Report Utilization Guide","Tc1de0654":"Diagnostic Report Sample","Tddc4a6aa":"Improvement Cases","Tb467ee94":"Synthetic data","Tcd1fb290":"Pebblous","Tblog0001":"Blog","T2b0be56a":"Please diagnose me!","T2b0be56b":"Request Diagnosis","Tbde6fc58-space":"","T974ac9e1":"Email authentication","Tec0c4cd8":"Done!","T7577cd5b":"Welcome to Data Clinic membership.","T722b44c9":"Log in","Td90c5b1e":"Enter new password","T7a58e8d9":"Change","T7ee0984a":"We provide quality assessment reports for selected public datasets. This allows you to experience the basic functions of Data Clinic. (OOO type)","T73a4b6e9":"100,0000 won per month ~","Taed54053":"Pebblous provides diagnostic reports for all public datasets selected by Pebblous. The list of data is continuously updated. (OOO species)","T54b9c7d8":"410,0000 won per month ~","Te8f2a1c9":"4,160,0000 won per month ~","T18c7a439":"You need to enter the correct email format.","T95d1c8b7":"Password is required.","T34a7e9d6":"Confirmation of password is required.","T904e9c5d":"Duplicate check","T904e9c5e":"Please enter your contact information in the format 010-XXX-XXXX.","T904e9c5f":"Please enter your company/affiliation.","Togioxjyg":"Data Clinic\nImprovement Case Study","Tadb88c39":"Introducing data improvement cases provided by Data Clinic. Solve the data quality issues you are struggling with with three data improvement solutions consisting of bulking, dieting, and replica.","Thxuokzjx":"Introducing data improvement cases provided by Data Clinic. Solve the data quality issues you are struggling with with three data improvement solutions consisting of bulking, dieting, and replica.","T4f1a014e":"Contact Us","T7295f2c5":"Data distribution in embedding space","Te28e75b8":"Capturing boundary points between different clusters","T88c7bdf0":"Targeting","Te6818c55":"Boundary point targeting technique","T70140f5a":"Density targeting technique","Tbace0573":"Bulk up","T57bef19f":"Bulk up at the center of the boundary point","Ta409db63":"Bulk up around low density points","Tde017513":"Original Birds-450 dataset","T090d8755":"Precision targeting synthetic data","T9431136a":"Improving AI model performance through precision targeting synthetic data","T5bb2a795":"Bulk up\nprecision targeting data","T82c4d6a9":"Original data","T95a3b7c6":"Low density teething bulk up","T37d2e8f1":"Boundary Teething Bulk Up","T30950206":"Pebblous uses its proprietary precision targeting technique to find areas where data is lacking and add the optimal volume of synthetic data to improve AI performance.","T42bea23d":"Case study on improving classification performance through data bulking","Tee6bb1de":"This is an example of diagnosing an actual dataset and adding about 5% of synthetic data as a precision targeting method, which resulted in a performance improvement of about 2% without any changes to the classification model or learning process.","Tcd3645cf":"Solve various quality issues discovered through data diagnosis with improvement services!","T90c31074":"Apply for data improvement service","Td3549a00":"Inhomogeneous datasets, including data duplication","T405263e8":"A homogeneous dataset after a data diet","T63a4b8c7":"Light weight through data diet","T82d9e5f4":"Random","T1a17c9f3":"Data Diet\nSolution Concept","Tc2db4d26":"Application of data diet to classification tasks","T35f85c5d":"This is an example of applying data diet to various classification datasets. Data can be lightened to the extent that classification performance does not deteriorate even when data is reduced by as little as 40% and as much as 80%.","T61a82eb8":"Data Diet Curve (Data Diet Curve)","T707558683":"Synthetic data for privacy protection","T135114123":"By creating virtual data that removed sensitive information, protecting the original data, the statistical characteristics of the data are taken as it is, and legitimate and ethical data can be used.","T797243922":"Reduction of privacy management costs","T139998685":"It combines artificial intelligence technology and traditional statistical techniques to accurately, safely and quickly generate data replica to reduce data management costs.","T412820628":"Promoting data utilization","T830025690":"The replica is designed to adapt to the target task or analysis goal of the AI model to flexibly respond to the needs of various fields to enhance the usability of the data.","T421997025":"Financial field","T693734027":"Maintain statistical characteristics of financial data and protect personal information. You can safely use card trading or insurance billing data.","T989862757":"Medical field","T677863258":"Medical records and clinical data are synthesized and used for research and analysis. Patient information protection and accuracy of research can be achieved at the same time.","T936406080":"Statement and statistics","T272307518":"It has statistical characteristics similar to real data and provides synthetic data optimized for research and policy development.","T0f8761ac":"The Effect of Data Replication","T4aebee91":"Areas of application for data replica","T97eb2cb3":"Learn more about\nhow to improve your data","T393212944":"Value","T2ceb9935":"Empty data area","T0a7e3b48":"Duplicate data area","Tc5265058":"Actual data","T7dc39eea":"synthetic data","T23d69ece":"Data Bulk-Up","T8o6lwxt1":"Find where data is lacking and generate precision targeting synthetic data suitable for the dataset.","Tfba60b1a":"Data Diet","Th1cddyw8":"Eliminate duplicate/similar data in the dataset and make it lighter while ensuring the performance of the AI model","T9545fd0f":"Data Replica","T26720349":"To protect the original and promote safe distribution of data, virtual data with similar statistical distribution characteristics are created.","T273b8298":"Three Improvement Solutions for Data Clinic","T36a34e0b":"Pebblouss is","T50dd7692":"Green Data","T223b0ee1":" and","T9a81a2d6":"Data Greenhouse","Tf1c86146":"Contributing to sustainable artificial intelligence.","T78330257":"Data Clinic's","Tc23cef14":"Differentiated technology","Tc3b01bfb":"Through DataLens, which is composed of the latest deep learning neural networks","Tc4f24342":"The multidimensional characteristics of artificial intelligence learning data","Te33115c7":"You can observe it precisely.","T9b15a3cd":"AI learning data / big data","T4db76066":"* 'DataLens' US trademark application","T3104ed66":"General type","T914dd92c":"Quickly deployable","T4ebcea66":"Using ready-made deep learning neural networks","Tcb253d7e":"Using deep learning neural networks","T45b2fdbd":"Customized","T46dad849":"Optimized for customer datasets","Tf783b824":"Data Geometry/Statistics Analysis","T1dc5fc27":"Complex relationships between data and","T7b1af155":"Clearly revealing the pattern","T3492ed1a":"Based on the latest algorithm","T9afea8c9":"It is a geometric/statistical analysis technique.","T3dad15b3":"Data visualization","Tfe7be4f9":"Complex data is intuitive and","T0e943971":"In an easy-to-understand visual format","Ta9f2750f":"By converting, user understanding and","T0383d0dc":"Strengthens decision making.","T9a27bc2e":"Improving data quality","T1bb90f4b":"Customized based on unique analytics","T85d8126e":"Improvements address the flaws in your data and provide data of optimal quality.","T688c39c7":"Securing intellectual property rights for core technologies","T51821199":"patent","T2d27964d":"Domestic applications/registrations: 35/7 cases","Tf44a95b3":"US applications/registrations: 5/2","T47119a7d":"PCT applications: 5","T75c62c45":"thesis","T4a2dc979":"Key features of Data Clinic","Tbb2fee98":"Supports various data formats (Modalities)","T99c165ae":"Multimodal. Quality diagnosis and improvement are possible regardless of data format.","T820458977":"Text, image, time series, orthopedic DB, formula, formula,","Tb6e00208":"In various industries and work domains","T1c71d0c1":"It is applicable","T42e5e9e0":"Data Clinic is being applied in various industries such as manufacturing logistics, pharmaceuticals/medical, fashion, and finance.","T366786419":"Mobility, Defense, Sports, Meta Bus, Resource recycling,","T7ee79522":"Efficient through precision targeting","Ta20b5f3b":"Generate synthetic data","T2cd30b68":"Synthetic data, again, is about quality rather than quantity. Pebblous accurately identifies where data is lacking based on data clinic diagnosis and adds just the necessary amount of synthetic data.","T6aa9c814":"Structured data in DB is also a data clinic","Te5ab9be9":"It is applicable","T6935d013":"Important information such as financial and medical information is stored in a structured form in the DB. Data Clinic provides diagnosis as well as reproduction data and combination evaluation for structured data.","T3c6eee2d":"AI learning data","Td85b0c97":"Manage efficiently","T7e79c61e":"Effective data lightening is possible through the data diet of the data clinic. It optimizes the volume of AI learning data by removing unnecessary data and maximizes resource efficiency.","Ta9bd9e4c":"Data quality is explainable","T9c08c19e":"This is the first step to AI.","T2126d746":"Ethical and fair use of datasets and compliance with data governance are the beginning of explainable AI. Data Clinic supports a variety of data quality standards.","T5cf49c07":"The quality of the data and","T3d5c3049":"Market problems caused by it","T638fd810":"Ambiguous data pricing system, poor model performance, waste of GPU resources, and inadequate response to increasing AI regulations","T40c0c2a9":"Waste of GPU resources","Tfa17eead":"AI Regulations","Taf027324":"Continuous and diverse","Tad962d43":"Data quality issues","Td509108c":"The syntax, meaning, and distribution of data","T5053a5d9":"Complex quality issues","T42030f3b":"Chronic lack of data and incorrect","Tcae92998":"Data collection direction","T01ce7e1d":"AI performance degradation","T8ccb0fac":"and","Tce83a032":"Due to low data quality","Tbb467a5a":"GPU/energy waste due to model-centric iterative learning","T4b84639e":"80% of GPU time and cost wasted due to duplicate data","Tce007207":"Case Link","T16bdd75a":"Data Center Energy Issues Consider Electricity Ration System (US)","T4f0764db":"Strengthening","T4a958568":"Growing need for governance","T9ca0a9c9":"The growing need for AI industry regulation and data governance","T11f0315f":"Increasing demand for AI-Ready Data","Te094c46a":"Gartner Link","T2ad31eec":"The first artificial intelligence regulation bill, the EU AI Act 2024, will be published in August","T9de7c229":"A fine of 7% of total sales or $35 million may be imposed.","Te439d30b":"About customer data","T5a02c1ef":"Quality Assessment and Diagnostic Report","T4be2cbf5":"example:","Td72f1efd":"Kaggle Bird-450 dataset","T63ae15d2":"Comprehensive Evaluation","Tdbe61464":"From a comprehensive perspective, we synthesize the results of Level I, II, and III diagnostics to evaluate data quality and suggest directions for improvement.","T0b0cffa2":"Summary of diagnostic results","T001d4ed2":"Diagnostic Report Issue Date: September 8, 2024","Tda0c83f1":"The Bird-450 dataset is of good quality overall, but there are some areas where improvements are definitely needed. The data is well-corrected and the total number of images is sufficient, but the number of classes is large, and the images for each class are not sufficient. In addition, some classes have too much diversity.","T64f12d3c":"Quality Improvement Suggestions","Tb0984b12":"If the boundaries between classes are not clear, you can add synthetic data to increase the discrimination between classes. (This applies to both the training and test datasets.) In the case of this dataset, since the number of data per class is relatively small compared to the number of classes, I suggest adding about 10% of the total synthetic data.","T6a49904a":"Contact us for data improvement advice","T4a920d21-bulk-up-data":"Bulk up Data","Td443b088":"Data replica","T25dc5655":"Diagnostic status is always available","T4d8442bc":"Once the diagnosis is complete, an email will be sent.","Tdb3a195e":"Pebblous solution","T3a38aa0f":"Satisfying AI suitability","Te23378c4":"Green Data","Td8152c11":"* Provided","T296ec839":"Providing high-efficiency, eco-friendly data that meets AI suitability criteria through data quality assessment and optimization","Td77de712":"Solution","T6ef99901":"A data management solution for next-generation governance that continuously evaluates and improves the AI suitability of data.","T9ea36a03":"Green data generation through semantic-based data operations","T8f299a07":"Automatic quality assessment based on a comprehensive survey of large volumes of data","Tbc10718b":"Generate precision targeting synthetic data","T494418ff":"Data lightweighting to remove duplication and bias","Tc94d9656":"Secure data distribution through privacy protection","T8916f5bc":"Data generation through semantic exploration/inference/combination","Td6d6b443":"Effective data management system based on meaning","T211f13df":"AI Industry Regulatory Response","Tdf555b71":"Flexible work response through data lifecycle management","T17faacfb":"Verifying the safety and ethics of data for sustainable AI","T5d13c0b5":"*Green data","T7c43e829":"What is it?","T94d50c68":"High-efficiency, eco-friendly data that meets AI suitability criteria through data quality assessment and optimization","T2fc29cf5":"*Data Greenhouse","Tf70a7d3d":"Data governance solution that continuously manages green data","T0e52d1e8":"Why Data Clinic?","T2067d009":"Based on the full bloom dataset","T74114d0b":"Time-fast quality assessment","Tc4e1e862":"Quality assessment and visualization of AI training datasets","Te9c40f68":"With synthetic data","Tbc750e10":"Performance Improvement","T0890f8aa":"Generating synthetic data for precision targeting based on quality diagnosis","T20500951":"By making data lightweight","T9a4568f4":"Increase efficiency","Tf77b169f":"Optimize learning efficiency by reducing redundant data","Tbb9d39ff":"Want to learn more about diagnosing and improving data quality?","T3d661176":"diagnosis","T28e99e39":"Step by step for Level I, II, III","Tac0db9eb":"Provides diagnosis.","T5c97fde4":"We provide systematic diagnosis, from Level I, which is a basic diagnosis based on EDA, to Level II, which fully utilizes artificial intelligence, to Level III, which is the highest-level diagnosis of the data clinic that considers data characteristics, target tasks, and even future synthetic data generation.","Tdd76ae9c":": Basic Diagnosis","T855b0047":"Level I diagnostics assess data integrity, missing values, class balance, and Statistical Measurements.","T431c6117":"This is a basic analysis step, which prepares you for the next step.","Td2048994":": General lens-based diagnosis","T72433313":"Level II diagnostics is the step of analyzing data using DataLens, a neural network-based data lens. In this process, geometric and distributional properties are observed to identify relationships and statistical distributions between data.","Ta3bfb459":": Data-specific lens-based diagnostics","T49113e24":"Level III diagnosis is the analysis step using DataLens, a custom neural network tailored to the domain of the data. This lens consists of a measurement lens and a generation lens, so it can also be used to generate synthetic data. The measurement method is the same as Level II.","Td0f61e2e":"Add, reduce and protect your data","T37c5ce26":"Provides ways to improve.","Tde1eedcd":"Based on the data diagnosis results, data is added through bulking in areas where data is insufficient, and data is reduced through diet in areas where there is too much data. In cases where safe data distribution is required, data protection such as privacy is strengthened by maintaining statistical significance.","T6f0d693f":"Find where data is lacking and generate precision targeting synthetic data suitable for the dataset.","Te4cfbb2d":"Eliminate duplicate/similar data in the dataset and make it lighter while ensuring the performance of the AI model","Tb8f37063":"To protect the original content, create virtual data with similar statistical distribution characteristics.","Tab357981":"Measuring the distance between data","Tb45bd652":"DataLens allows you to measure the distance between two data, which can systematically explain the similarities and differences between the two data. The distance measurement between two data can be extended to multiple data, which can be used to calculate the overall structure of the data. This method is effective in checking for the presence of outliers or duplicate data.","T07a15322":"The 10 data closest to a specific data","Tf9f9624d":"The 10 data points that are furthest from a specific data point","T50b20afe":"Origin distance distribution of multidimensional feature vectors: magnitude of the vector","Ttw3shf2t":"Average of all data","Tn7ih2cpj":"Distribution by class","T19b0f721":"Measuring the internal density of data","T20af8b05":"Data density measurement quantifies the density of data points in the embedding space. It is a key technique for interpreting the structure and complex patterns of data and inferring meaningful relationships. In the data quality diagnosis report, density measurement result charts are used to identify over- or under-density areas and distinguish between data in important areas and potential outliers. This density measurement is effective in identifying data similarity, clustering, and unique patterns.","T1eff7963":"Spatial distribution of geometric properties: Projecting the density computed in the intrinsic dimension to two dimensions","T0b243b59":"Statistical distribution of geometric properties: Class-wise density distribution","T04d540f1":"Data Clinic, a comprehensive hospital for data,\nprovides all-in-one services from data quality diagnosis to quality improvement.","T836f8e54":"Web and print versions of the Data Clinic","T187313eb":"What is Pebblous DataLens?","Te794ee5b":"Data Lens and Diagnostics","Tc12e657f":"Data Lens and Imaging","T238cea4d":"Data Lens and Diagnostic Level","T8021beb8":"Analyze and diagnose AI data quality with Pebblous DataLens.","T1a1672ad":"Pebblous DataLens uses artificial neural networks to express customers' AI learning data as feature vectors in the embedding space. Pebblous own indicators are used to measure the quality of data using various scientific indicators for the data imaging results that have been converted to enable observation and measurement.","T24f489df":"Data Imaging","T2f70d2b6":"Task-neutral quality assessment","T8b0ef8d7":"A process of diagnosing statistical and physical characteristics of the dataset itself, regardless of the dataset's domain or intended use.","Tae910c01":"Perform advanced quantitative and qualitative diagnostics on the data distribution in the embedding space, along with basic quality diagnostic indicators such as consistency, integrity, and completeness.","Tccd8151e":"Task-specific quality assessment","T0288ad7b":"A customized diagnostic process designed to fit the AI model and task the customer wants to configure, taking into account the domain and purpose of use of the dataset.","Tde8f5878":"Perform advanced diagnosis by simultaneously considering the data distribution of the embedding space and the characteristics of the target task, setting diagnostic goals such as class discrimination, data density, and outlier detection.","Tae6688d3":"Data Clinic's diagnostic system is designed to effectively diagnose and manage customer data through a framework that evaluates and improves the quality and structure of data by diagnostic level.","Tf930a864":"Data Imaging is a patented core technology of Pebblous, a technology for visualizing and interpreting complex multidimensional datasets. It transforms large datasets into purpose-built embedding spaces, allowing neural networks to understand the structural characteristics of the data. It provides deep insights into patterns and relationships in the data, helping to make informed decisions and predictive analytics.","T0704b249":"Intrinsic dimensions of data","Tb790ddfe":"In the process of extracting feature vectors by passing the entire dataset through the DataLens, the minimum dimension of the feature vectors optimized for the input dataset is calculated, and this minimum dimension is defined as the observation dimension or intrinsic dimension. The observation dimension is the minimum dimension of the feature vectors output from the general lens, and the intrinsic dimension is the minimum dimension of the feature vectors output from the custom lens.","Tf2976bdf":"Standard Lenses vs Custom Lenses","T7f92d5bd":"Level II Diagnosis (Standard Lens)","T6ce71195":"Level III Diagnosis (Custom Lenses)","T52144801":"Data Lens","T5720cd95":"We build a ‘data lens’ using ready-made deep learning neural networks that can be quickly introduced.","Ta5f55d65":"Optimized for your tasks and domains","T18c75fb5":"Customized artificial neural network","Tb643a96a":"We use this to construct a ‘data lens’.","T3b76c809":"Diagnostic scope","Tee94a280":"Focuses on assessing the quality and structure of data by measuring geometric and distributional properties of the data, identifying areas of information that need improvement.","Td14623cc":"In addition to Level 2 diagnostics, advanced indicators that enable in-depth diagnostics provide insight into complex issues in your data.","T854cea85":"Improvement Solutions","T0ea797b4":".","Tec15af0a":"Problem identification","T71e7377f":"Distributional analysis in a universal embedding space can help identify issues such as bias in the training dataset.","T920b7f20":"Data-specific embedding space","T8ef3bd59":"You can find problems in your learning dataset more precisely and find improvement measures such as precision targeting synthetic data.","T00580a41":"Want to see a more detailed chart?","Tyyocloz2":"Overall distribution","Tj9ei0l5z":"Density charts","T8oej3ypd":"Density by class","Tlkpkknyp":"Distance-Density Chart (Feather Chart)","Tc5f04143":"High-dimensional data embedding\nvectors are projected\ninto two dimensions\nfor visualization.\nThis allows you to gauge\nthe overall shape of the data,\nsuch as the presence or\nabsence of clusters.","T53ea46ab":"It displays the data density measured in high-dimensional data embedding by projecting it onto a two-dimensional space. One cell is one data point.","Tb7eb6285":"Diagnostic level 2","Te72c09e5":"General lens based","T4666c253":"Diagnostic level 3","T8c6febab":"The density distribution of representative classes is displayed as a box chart. The classes with the lowest and highest average densities are displayed on the far left and right, respectively.","Tbad2b956":"It displays the size and density of high-dimensional embedding vectors at once, so that you can understand the approximate spatial distribution and density together. Data with good distribution usually have a single feather shape.","T62a7d710":"From data quality diagnosis to improvement","T3e9229f9":"Based on data treatment based on accurate quality diagnosis of data clinic, safety, efficiency, and reliability are guaranteed.","T96e98e5e":"Get your AI-Ready Data ready!","Tcc6fc601":"I'll explain in more detail below.","Tca968be7":"Main Process","T435b4418":"01. Data Diagnostic Engine","T575623a3":"We use advanced AI techniques to view your data from the optimal perspective and measure its characteristics.","Tfdac45a9":"(2 US patents registered for core technologies including DataLens, 28 domestic and international patents applied for)","T790dd3f7":"Unlocking the potential of your data","T1e9ea692":"Transform large amounts of data into observable and measurable data using data imaging and Data Lens technology.","Td706804b":"Data Quality Diagnosis","T8931f34a":"Precise data analysis","T9324bdc0":"Data coverage, density, homogeneity, outliers, etc. for imaging results","Tb108b3bf":"Generate detailed reports by measuring various indicators of data quality.","T2c52b39d":"02. Data Improvement Engine","T807124347":"Based on data diagnosis, creation of precision targeting synthetic data (data bulk up),","Te5fafb55":"Improve data quality through data optimization and weight reduction (data diet).","T2845854f":"Prescription for improvement","T02c35635":"Analysis considering domain characteristics","Tb7af3449":"We interpret the results of data diagnosis once again from customer and domain perspectives to suggest the most optimal data quality method.","Tad6a0231":"Quality improvement","T69e5ccea":"From quality improvement to reference model","T4ba0a263":"We will show you how to improve data quality and create an artificial intelligence reference model through data bulking (adding synthetic data) or data dieting (volume reduction) techniques to improve performance.","Tc14068ae":"Request for consultation on the rate plan page","T7ltkguq9":"Try it for free","T82ab7ee0":"Get advice","T38403b4e":"Data Clinic's rate plan","Td56f8810":"Introducing.","Ta4838b06":"If you need further explanation or have any questions, please feel free to contact us at any time.","Td3f89aff":"Through consultation, we will apply promotions and find the optimal rate plan that suits you!","Tz4gt87aa":"Data quality diagnosis of larger volume is possible. In addition, quality improvement services such as data diet and bulk -ups are also available.","Tb38e130a":"Enables quality diagnosis of larger volumes of data.","T5047b80a":"This plan also offers quality improvement services such as data diet and bulking.","T8a805483-data-diet":"Data diet","Tc882e5ad":"Customer Data","Ta18b485d":"Diagnose up to 1 million images per run","T37212354":"10,000 won/m","T3c3089d6":"100,000 won/y","T4ccff6e3-500-000-won-month":"500,000 won/m","T4ccff6e3-50-million-won-month":"50 million won","T4ccff6e3-100-000-won":"100,000 won","T4ccff6e3-per-month":"/m","T4ccff6e3-per-year":"/y","T8be469b6":"10,000 diagnostic credits","T076d11e5":"10,000 diagnostic credits based on image when subscribing monthly","T4688bc43-5-million-won-year":"5 million won/y","Te4eafeb6-10-000-diagnostic-credits":"10,000 diagnostic credits","Te4eafeb6-200-000-diagnostic-credits":"200,000 diagnostic credits","Tqcq9t5uu":"Free to use","T56f1854e":"Annual plan includes 120,000 base credits + 80,000 bonus credits = 200,000 total, enough for two 100k-image diagnostics.","T56f1854e-couple":"","Tb6046122":"5 million won/m","T98d502cb":"100,000 diagnostic credits","Teeac26a3":"100,000 diagnostic credits based on images are generated when subscribing monthly","T1549b117":"50 million won/y","T68eba01b":"2 million diagnostic credits","Tc73c27e8-annual-plan-1-2-million-0-8-million-total-2-million":"Annual plan includes 1.2 M base credits + 0.8 M bonus = 2 M total, enough for two 1-million-image diagnostics.","Tc73c27e8-couple":"","T358680a6":"Data Clinic Web","T3589ae76":"Dataset Curation","T1ae87299":"Web Diagnostic Report","T2045754e":"Advanced visualization and interaction","T2b775427":"public + customer data","Tb5441de3":"Download PDF Report","Tb48e3a5c":"Recommended Data","Tc09bc392-synthetic-data-trial":"Synthetic-Data Trial","Tac0a246e-10-synthetic-data-trial-cost-10-synthetic-data-trial-cost":"10 times/month (after consultation)","Tac0a246e-up-to-10-synthetic-data-trial-cost":"up to 10 runs / month (post-consultation)","T596ad90a-100-synthetic-data-trial-cost":"100 runs / month after consultation","T133d0dd4":"up to 100 / month","T5cf2f885-1000-synthetic-data-trial-cost":"up to 1,000 / month","T8dc60a6f-customer-data-diagnosis":"Customer Data Diagnosis","T463e64bb":"Image data diagnosis possible.","Ta75ad86d":"Need text, multimodal, or structured data? → Contact us for a custom quote","Tfbce9f12":"Diagnose customer data using diagnostic credits","Te2226b4b":"Diagnostic Credits: Unused credits can accumulate. Non-transferable.","T59a84bbd":"If you maintain the basic version or higher: It will not be destroyed.","Ta44c01b2":"Free version: 1 year. When upgrading to Basic version or higher, you can use it up to the maximum limit of the plan.","T5d46ab88":"(Level II Diagnosis)","T1b885e19":"(Level I, II, III Diagnosis)","T8424806c":"Printing service 3 million won/unit","T7ae01e75-download-diagnostic-results":"Download Raw Results","T28a918ff":"Special samples etc.","T6815efef":"10 million won","Tc142fca1":"Proceed with a customized project","Tc8a39e6b":"Up to 10,000 images/annotations","T4169bb4f":"generation","T7bd1c6a2":"Additional cost: 60 million won","T7e4bbf7b":"PebbloScope","T6052ded0":"Interactive data visualization and communication tool (standalone: consultation required, web: coming soon)","T125d8d32-pebbloscope-web":"PebbloScope Web","T0b372a29-snapshot-capture":"Snapshot Capture","Tskf4zpal":"PDF report 1.5 million won/unit,","T973cc666-full-pebbloscope-build":"Full PebbloScope Build","T53a1b689":"up to 100,000 images","Tdz3ju2aj":"Importance by sample, various charts,","T7925f1fb":"up to 1 million images","Te3z52z14":"Based on 100,000 images,","T02e6f24f":"Advanced visualization and interaction of public data","T02e6f24f-interactive-view-public-data":"interactive view (public data)","Tbbf1aad7":"Customer Data (Level II Diagnostics)","Tdf311296-levels-i-ii-iii":"Levels I, II, III","T328b274b-importance-by-sample-etc":"per-sample importance scores, charts, special cases","T55c20ef5-100000-images-10-million-won":"trim up to 100 k images (from ₩10 M)","Ted6936fd":"Additional cost for custom project creation of up to 10,000 images/annotations: 60 million won","T05f9711b":"Pebblous Recommended Dataset","T5ab4ead7":"By registration date","Tdbd42a0c":"Sort by title","T222ffc3b":"Sort by number of views","T2e100385":"With filters and search","T053e8630":"Find the dataset you want","Tc6l3nylm":"Enter keywords","T2d7cf974":"reset","Tcf336d89":"Want more settings?","Tc36eabbb":"Apply","T415b9164":"Data type","T183bc370":"1 thousand","T2ce634ec":"Between 1,000 and 1,000,000","T5c752a5f":"1 million","T283749156":"Between 1,000 to 1 million","T947612583":"1,000","T628495713":"5,000","T135722468":"10,000","T391548726":"50,000","T369258147":"100,000","T159753486":"500,000","T5b2e3a1f":"Between 1K and 1T","T474d365d":"Subject / Field","T4e70b7ab":"The page you are trying to access is not accessible.","T4e70b7ac":"This page is accessible only to authorized accounts.","T4e70b7ad":"You are not currently logged in or your account does not have access rights.","T4e70b7ae":"Please contact your administrator for more information.","T09cd85d9":"The service is being prepared.","T72a8ea70":"We are preparing to provide better service.","Tbee9ea4a":"We will prepare and come to you as soon as possible.","T333e2daa":"Back to landing page","T4e70b6ac":"Page not found!","Tee72fe00":"The page does not exist or is unavailable.","T79527816":"Please double check that the address you entered is correct :)","T02a8bac0":"Are you struggling with data quality issues?","T899f1eff":"Data Clinic has the answers.","Tb13cca19":"View Data Clinic Introduction","T181b2e09":"Consult with Data Clinic","T87351969":"Companies with Pebblous","T7a53b16a":"Level I Diagnosis Results","T98ad3658":"Level II Diagnostic Results","Tbca1fcfc":"Level III Diagnostic Results","Tq33cx8or":"From diagnosis to remediation, a general hospital for your data","Tttr3lqgs":"Level I Results","Tckcy8y72":"Level II Results","Twd35rbiv":"Level III Results","Tb1087ae3":"Data Clinic: Service Highlights","T20f3bb40":"Data Quality Improvement: Case Study","T79c1cca5":"Introducing data improvement cases provided by Data Clinic.","T8004ef7c":"Three data improvement solutions consisting of bulking, dieting, and replicating.","Tb37e6339":"Solve the data quality problems you're struggling with.","T4987f3a7":"Go to see improvement cases","Tc0908391a":"Synthetic data examples","T5624f95a":"Go to synthetic data case study","Tfed358ed":"Go to dataset","T528586240":"Please enter the dataset name you want to change","T528586241":"Dataset name is a required field.","T528586242a":"Allowed characters: letters, numbers, underscores (_) and hyphens (–) only.","T528586243":"You can enter up to {count} characters.","T528586244":"Only Korean, English, numbers, underscores (_), and hyphens (-) can be used in the name.","T635824371":"Application for diagnosis","T453631537":"Diagnosis completion","T813678657":"Diagnostic report completed","T19738ce3":"","To0prdq69":"Diagnosing","T06vr3vew":"Diagnostics complete","T5yzu8kir":"데이터 진단리포트 작성을 완료","Tr00a4wc7":"했습니다.","Tbvygzjst":"No diagnostic data submitted!","T8iau1v9c":"Get your data analyzed now!","Tyu9rsrv7":"Request Diagnosis","T6t1tc1ui":"No completed diagnostic reports available!","Tn1njwgmj":"Get your data analyzed now!","Tngzvj5yn":"발행일","T150e782d":"Favorite datasets","T27370c11":"Diagnostic Report Favorites","Teb1529f5":"You have no favorite datasets.","Ta35a8c0d":"In your favorite datasets","Tbb4be8ec":"Please press","T540d2cc5":"View dataset sample","Tcb18847d":"There are no favorite diagnostic reports.","T48c821ba":"To your favorite diagnostic reports","Tbf487d78":"Click here to see a sample diagnostic report","T61e56dfe":"Change the dataset name","T277eb50c":"image","T67e828f8":"Diagnostic dataset file","Tbfd7a61a":"Diagnostic status","Tc4b1cbc9":"Here is the slider","T438705795":"My Data Clinic","T868970069":"Plan in use","T571389247":"Favorites","Tac46614d":"Existing dataset name","T64ba16cf":"The name of the dataset you want to change","T8beaf072":"View diagnostic status","Tcb6e09fe":"View diagnostic reports","T66b80f44":"Free to use","T358680a6-core-toolkit":"core toolkit","T358680a6-basic-toolkit":"basic toolkit","Tc3bed6d5-public-data":"public data","Tc3bed6d5-access-to-curated-public-data":"access to curated public data","Tc3bed6d5-full-toolkit":"full toolkit","T359b5c42":"Advanced visualization and interaction of public data","T547150129":"Advanced visualization and interaction public data","T547150129-interactive-visuals-public-data":"interactive visuals (public data)","T54619f82-get-full-reports-on-public-datasets-curated-by-pebblous-new-datasets-added-all-the-time":"Get full reports on public datasets curated by Pebblous—new datasets added all the time.","T2a5b3186-5-000-000":"5 million won","T2a5b3186-per-year":"/y","T2a5b3186":"per year","Td0405576":"up to 10 per month","T45be3db6":"Full-scale data-quality diagnostics for your own datasets—plus every feature in the Basic plan.","T6328a065":"(Up to 100,000 sheets can be diagnosed twice)","T54cc3bf6":"Advanced visualization and interaction of public + customer data","Tfc3a9ea0":"Up to 100,000 images can be diagnosed","Tfc3a9ea0-plan-info-up-to-100000-images":"Diagnose up to 100,000 images per run","T457412559-unused-diagnostic-credits-accumulate":"Unused credits roll over while your subscription is active (not transferable)","T4f45b89c":"public+ customer data","T439246253":"interactive visuals (public + customer data)","T439246253-pro-interactive-visuals":"interactive visuals (public + customer data)","T439246253-enterprise-interactive-visuals":"advanced, interactive (public + customer data)","Tc91ed976":"(Up to 1 million sheets can be diagnosed twice)","Tf8a9d7a2-print-report-pdf":"Print Diagnostic Report PDF","T5a802bd2-print-report-pdf-cost":"PDF ₩1.5 M /unit, printed copy ₩3 M /unit","T97f8d9e3":"Additional cost for custom project creation of up to 10,000 images/annotations: 60 million won","T633252920-bulk-up-data-cost":"generate up to 10 k images (from ₩60 M)","T365a298b-customer-data-diagnostics":"Customer-Data Diagnostics:","T388333888":"Current plan","Tc779dab1-interactive-visualisation-communication-tool":"Interactive visualisation & communication tool\n(Stand-alone version available via consultation; full web release coming soon)","T8ecfb74a":"Price","T6edbe22f":"Purchase Credits","T6edbe22f-purchased-credits":"Purchased Credits","T6edbe22f-promotion-credits":"Promotion Credits","T85838040":"Remaining Credits","T7034606a":"Plan Start Date","T38998b9c":"Plan End Date","T97dc992d":"Cancel your plan","Tc195cf97":"More possibilities, see full plan","T7059edf1":"hello !","T93df7274":"Edit my info & apply promo code","Tndvq34m0":"Available until ","Tvjss8qa7":"!","T7d414540":"A story of vision and passion","Tb46e8f2e":"Stories to help you step out of your own world after leaving your job","Tda1c6723":"2023 Data Grand Conference TRACK 2-2 Lee Ju-haeng","Td6f9cbf0":"In the AI era, improving data quality assessment strategies to enhance data utilization","T8297bf22":"Data quality assessment and improvement cases by data clinic utilization type","T2730b220":"All-in-one comprehensive data solution that increases reliability through data quality assessment","Ta5d0c8a4":"Green Data Solutions for Sustainable Artificial Intelligence","Tc03ebbf9":"Discover new possibilities","Tbaa17fbf":"Pebblous members\nconvey the truth of data.","Td2170c56":"Pebblous is a\ncomprehensive data hospital.","Ta1e939dd":"Pebblous is a deep-tech startup founded in\nNovember 2021 by AI and data researchers\nwith the mission of ‘bridging the gap between users and data.’\nBased on world-class technology\nand a sincere commitment to data quality,\nwe aim to grow into a global company\nthat contributes to sustainable artificial\nintelligence for humanity.","T89ef46eb":"Browse through our homepage","T22decdcc":"Pebblous Pitch Deck","T123456789":"You can see the detailed explanations of Pebellus' core competitiveness and major business models.","T987654321":"Go to the English version","T246810135":"Go to the Korean version","T5c99b6d1":"Data Clinic Brochure","T135792468":"We have summarized the data clinic, the core product of Pebellus.","T975318642":"Go to brochure","Te00945ff":"We put data as hard as pebbles\nin your hands.","T186eb859":"all","Tvazt3278":"Enter a title for the diagnostic report.","T8313ff89":"Diagnostic Report Chart Explorer","Tf5ee7e77":"Select diagnostic report","T2gzk2j1q":"Please select.","T24b1204d":"Chart Type","T312s2qnx":"Level","T1b909021":"class","Ta0ac5fa5":"Please select the chart you want to see with Pebbly","Txvywq0gt":"Class average","T7yq76mp5":"Average of all data","T26dplfeb":"Density","Tz4gt87ao":"Maximum distance","Typui9i98":"Minimum distance","T529vz97i":"Average image distance","Tuo914fwd":"Average image of each class","T897bee97":"Top 20 and bottom 20 data by density","T441fed84":"High density sample","T0fca007e":"low density sample","T46cfe343":"suggestion","Te1e1b5b0":"Description of reference point","Tpg40005m":"Ideal for leveraging AI learning.","Tgd9a6tcy":"Improvements are possible and there's room to fix things.","Tkv6qeblo":"A relatively serious issue has been detected. It requires attention.","Tlpif9bjs":"Difficult to assess due to lack of diagnostic data.","T9b26ghtb":"Score","T7bb142e1":"Consult on quality improvement","T0c2a2102":"Basic EDA based","T2bfbbdcd":"General lens based","Txqffa1h0":"Measuring conformance","Tlls5vvfp":"Class balance","T7c392156":"Data-specific lens-based","Tkae6mct8":"General lens selection and imaging results","Tidiarm1n":"Geometry property observations","Tj6fv52ns":"Distribution property observations","Tazfb9z9r":"Data-specific lens selection/processing and imaging","T7b9cbf37":"lowness","T7eaf0649":"height","Talr5n852":"Good","T8wz112c6":"Fair","Telfhkolg":"Poor","Twntxqz07":"No Particular Findings","Thf1e3ceh":"Value is unknown","T3e0e939b":"View all classes","T7a921d3c":"View all","T8f36e2a4":" classes","T5b429a7d":"View all","T3f81c6e2":" classes","Tf3fddd92":"Diagnostic Results - Basic Diagnosis","T486869b6":"Data integrity","T2a8afe51":"Image size","Tdd617b7b":"Image Channel","Tabcdd65f":"Label Consistency","Ta7756f58":"Check for missing values","Tcd23b524":"Measuring class balance: number of data per class","Te6300535":"Class name","T7f64998c":"Number of data (training criteria)","Tbc264751":"average","T6f0b8a78":"Standard Deviation","Td0a23975":"Statistical Measurement","T95f63396":"Overall Statistics","Tbf28a0f4":"Overall average image","Te287a153":"The overall average image can be used to gauge the overall trends in color, shape, and pattern of the dataset.","T7d09b40c":"Pixel histogram of the overall average image","T37ad5bf2":"You can find out the distribution characteristics of each color channel of the entire average image.","T91e296ac":"Class Statistics","Tbb5c1100":"Average image by class and","T240430c8":"Average histogram","T82aea2ba":"Class-wise average images can be used to gauge class-level tendencies in color, shape, and pattern in a dataset. Comparing to the overall average image or to other classes can also reveal class specificity.","T85dd5243-apply-datalens":"Apply DataLens","Td6629fc4":"Select Data Lens","T5c4dbfd2":"Base neural network","Tf6cf6e6a":"View Link","Tfbe994a1":"Observation Resources","Td1f7dee9":"Observing geometric properties","Te6f9d6c4":"Macroscopic property observation","T6bf7457a":"Overall data distribution","T139e5442":"This is a 2D PCA result for visualizing the high-dimensional imaging results obtained by DataLens. In the chart, the origin is the origin vector value in the imaging space, and the mean image feature is the vector value of the entire average image imaged by DataLens. The higher the diversity of the image, the greater the distance between the mean feature and the mean image feature.","Tt2l5tmru":"Full data distribution chart","Tc800537a":"Manifold shape measurement (I) Macroscopic","T33b28a40":"The data imaging results are observed as a manifold in a multidimensional space. The horizontal axis represents representative classes. The vertical axis is the average of the magnitude (norm) of the feature vectors belonging to the class, which corresponds to the average distance from the origin. The minimum/maximum distance from the origin is displayed together to estimate the overall size of the manifold and the specificity of each class. The average image of highly diverse data is not similar to any image in the dataset, so it usually exists outside the minimum/maximum interval. However, since the data lens used for level II diagnosis is domain-neutral, the average image usually also exists in the minimum/maximum interval.","T72fc7608":"Observation of local properties","T465e3327":"Distance-based similarity measurement","T9c4f19cc":"This is the result of distance-based similarity search for representative images by class. For example, for a given data, it extracts the 10 closest and 10 furthest data and shows them. This allows you to identify local singularities within the dataset. This helps identify outliers and duplicate images within the dataset.","T3fefd4b9":"10 closest data points: Based on total data","Ta6c44fb8":"10 furthest data points: Based on all data points","T1bf495c4":"10 closest data points: based on same class","T0636b1fa":"10 most distant data points: Same class criteria","T3a2b2d99":"Density Measurement (I)","T0ada5ba7":"The density is calculated by calculating the distance between adjacent data for each data on the multidimensional manifold, which is the result of data imaging. The density is higher when there are more other data around a specific data, and the density is lower when there are fewer. Data with high density are more likely to be duplicates, and data with low density are more likely to be outliers. Density is visualized through two-dimensional PCA, not observation dimension. At this time, the darker the red, the higher the density of the data. In the case of density measurement by class, a total of 12 classes representing the distribution of density are selected and the results are displayed.","T63267411":"Density chart: total data (calculated from observation dimension and visualized in two dimensions)","T52ea72e8":"Density chart: by class","T40aea8ba":"Distance-density measurement","T98d95f55":"The shape of the multidimensional manifold as a result of data imaging and the density of each data are shown together. The horizontal axis is the distance from the origin of each feature vector, and the vertical axis is the density of the corresponding data. The distance-density chart of a dataset with a good distribution of distance-density measurement results for various data has a single feather shape. Therefore, it is also called a feather chart. Usually, similar/redundant data are located in the dense area at the top of the feather.","T15cc04d8":"Density distribution over space","Te686578d":"Density distribution by class space","Tdd5fe9d4":"Density Measurement (II)","T483b9ff5":"Similar to density measurement (I), but adds contour lines so that the density distribution can be observed together with the macroscopic distribution of the data. When viewed together with density, clusters in the macroscopic distribution can be more easily detected.","Tecf4dcee":"Data density line: full data","T6ec7cd1c":"Data density lines: by class","T4f9a57d6":"Observation of distribution properties","T989f41fe":"Observe statistical properties","T5c7ddff0":"Manifold shape measurement (II) Statistical","Tb9cb29ab":"The data imaging results are statistically observed in the manifold of the multidimensional space. The horizontal axis represents the distance from the origin for each vector value, and the vertical axis represents the frequency of each vector value. The graph indicated by the dotted line represents the mean frequency of the corresponding class. This allows us to understand the distribution of a specific class in the manifold. Four charts are shown centered on the reference point: (1) distance from the origin of the manifold, (2) distance from the origin by class, (3) distance from the center of the data, and (4) distance from the center of the data by class.","Tf0cf3735":"Distributional shape of the manifold: (1) Distance from the origin","T31839b7d":"Distributional shape of the manifold: (2) Distance from the origin by class","T5b0f87cc":"Distributional shape of the manifold: (3) Distance from the center of data","Td10e6b79":"Distributional shape of the manifold: (4) Distance from the center of data by class","T74f821b0":"Density Measurement (III) Distributional Properties","T070a40a9":"The distribution of the density of each data is shown in two charts. First, in the histogram chart, the horizontal axis represents the density value, and the vertical axis represents the frequency of the density. The histogram can help you understand the overall density distribution of the dataset. It is especially helpful for understanding the distribution of outliers such as edge cases. The second chart shows the density distribution for the representative class in a box-whisker chart. The representative classes are arranged in order of density, and can be compared with the average density. If you improve the data quality (bulk up/diet) in the future, you can also see that the density distribution improves.","Tab24b57d":"Density histogram: full data","Teef36d0b":"Density histogram: by class","Tac2424c2":"Density Box Chart","Ta7b1039b":"Special sample examples","T4fabba30":"A singular sample from a density perspective","T81dcc1c6":"After the quality diagnosis, we show you some outliers that you should look at again with domain knowledge. First, we show outliers in terms of density. We show you the 20 most dense and 20 least dense samples in the entire distribution and by class. High-density samples are likely to be similar/duplicate data and will be the target of data diet in the future. Low-density samples are outliers. Depending on the target task, you may need to keep them as edge cases, remove them as outliers, or bulk up by adding data to the surrounding area to increase the density.","T3b758a43":"Top 20 and bottom 20 data by density by class","T19940b69":"Custom DataLens","Tc4400fd1":"Data lens processing","Tcff83855":"Observation dimension","T313ab36f":"Lens processing type","T4f9a57d6b":"Task-specific metrics","T961596170":"Frequency (unit: cloth)","T180224162":"Pixel value","Tae6bkob1":"Distance from the origin","T5ffdbae8":"Average distance from origin","T5ffdbae0":"Average image","Tf91e1ef1":"Diagnostic Level I Summary","T612d4879":"Diagnostic Level II Summary","T2ed00c69":"Diagnostic Level III Summary","T6e83d3a7":"Measuring Data Integrity","T76bd175b":"Missing Value Measurement","Tae720178":"Class Balance Measurement","T7da70130":"Overall average pixel histogram","Tc4109d02":"Statistics by class","Tf3956b94":"Data Lens Selection and Imaging","T27f0caab":"Select a data lens","Td1f7dee9b":"Deep geometry","T4f9a57d6a":"Review distribution","T4af351dc":"Observe statistical properties","T59e14503":"Data Lens Processing/Selection and Imaging","T3958769e":"Diagnostic Report Guide","T469685708":"Overall density chart","Tfb04b7b8":"Data quality assessment and improvement for the AI era","T141f7876":"Pebblous Diagnostic Report's unique features","T52170889":"Using unique data lens technology","T47ab74f9":"After converting data into a form that can be observed and measured using our self-developed DataLens, we analyze the quality of the data using multifaceted methods such as geometry and statistics. Depending on the customer's needs, we diagnose by applying the optimal DataLens, from ready-made to customized.","Td375ba0a":"Providing customized diagnosis","T1c4715d6":"From traditional Exploratory Data Analysis (EDA) methods to new technologies like DataLens, we provide optimal diagnostics by considering the characteristics of the data and work, as well as the customer's business objectives.","Te56743c9":"Intuitive and scientific report","T5eb2226e":"Diagnosis at each level provides a variety of charts, diagrams, examples, and explanatory texts to help you understand the multifaceted characteristics of the data more easily and intuitively. The results at each level are summarized and provided in the final results. If necessary, suggestions are made to improve the quality of the dataset.","Tf2f72c23":"Identify problems with actual data through diagnostic reports","Tb7667ad8":"View diagnostic report cases","T80f45dd0":"Customer Communication","Tb486da20a-define-objectives":"Define objectives","Te9ko13xa":"It is a key element that determines the direction of data quality diagnosis. By clearly setting the purpose and goal of the diagnosis, it helps to derive efficient diagnostic methods and finally help the diagnostic results can lead to practical business performance.","Te3071d78a":"Confirm tech needs","Tiza8rs4u":"This is related to the tools used in the actual diagnosis (eg algorithm, lens model, etc.), platform, system performance and processing ability. Designed or provided the optimal diagnostic solution for your system infrastructure and technical environment.","T84f1fce8":"Budget and Timeline","Tj3ndvnko":"This is a matter of efficiently distributing project resources such as input budget. Check the diagnostic range and the details of each stage so that all steps can proceed without disruption within the schedule.","T5a918063":"Comprehensive Evaluation","T3be99686":"Summary of diagnosis","T5f7a7091":"Write a quality improvement proposal","T362947185":"Considering the comprehensive evaluation opinions on data quality, taking into account the results of the level I, II, III diagnosis and customer communication. It mentions both neutral quality and work -specific quality.","TxQ2f7D8pL":"Write a diagnostic report by integrating each analysis result of the diagnostic process introduced earlier. Diagnostic complaints include detailed information on the overall quality, geometric and distribution characteristics of the dataset, the problems and potential values found. In this report, the results of each stage are systematically organized, and the various aspects of the data are widely viewed from the basic characteristics of the data to the advanced characteristics. This clearly understands the quality of the dataset and establishes an efficient data management and utilization strategy. As a result, it contributes to effective big data analysis and artificial intelligence learning.","TpK7x9Zn3E":"Most of the datasets have more duplicate data than you think. Duplicate data is recommended to solve data redundancy because it causes bias in model learning and inefficiency in storage and computational resources. It is also necessary to adjust the amount of data used according to the AI model development process. For example, in the early days, it is more efficient to reduce the size of the data, rather than using the entire dataset for learning. Minimize the use of GPU resources and make it possible to reduce the overall development cycle by enabling fast learning speed.","TaB5c8Gj2H":"Class -based optimization is the advanced procedure of the data diet. By default, diets at density of the entire data, but furthermore, the data distribution between each class or each class is carefully analyzed to select the data target data. In other words, it emphasizes the core characteristics of each class, while maintaining the distinction between classes. This allows the AI model to recognize each class faster and more accurately and to maximize the performance of the model. It can be applied to the general tasks besides the data diet distinguished.","Tf6Q1m3R7s":"Data bulk up means adding synthetic data in common terms. However, the Pebellus Data Clinic adds synthetic data with precision burning. In other words, the diagnosis is created through various techniques that find the parts that are relatively lacking in data and create synthetic data. Not only can it improve the distribution quality of the data neutral, but also applicable from a task -specific point of view. For example, bulk -ups relieve the imbalance between classes and help the model to better distinguish each class. It also contributes to strengthening the generalization of the AI model.","Tn9W7vZ2k5":"It maintains the original data and statistical and distribution characteristics, but it is a virtual data separate from the source data, so it provides a variety of analysis and the possibility of using data without the risk of exposure of sensitive data. By utilizing data replica, it contributes to accelerating data -based decision -making processes by enhancing data accessibility and greatly improving the target range and utility of analysis.","T7j8L3q5Y6":"Mentioned the main results for diagnostic items of Measuring data Integrity, Missing value measurement, Class balance measurement, and Statistical measurement.","Td2P9c7X4e":"Among the data lenses that Pebellus have been referred to, we select the characteristics of the target data and the lens that are most suitable for tasks. The characteristics of the data lens, the results of the imaging, the characteristics of the multi -dimensional geometry, the distribution characteristics of the statistical point of view.","Tg4V6b1M9s":"Instead of ready -made data lenses, summarize the results of the diagnosis with a special data lens created in consideration of the characteristics and tasks of the target data. Diagnosis is the same as level II.","Th5Y2x8N3z":"Measuring Data Integrity, Missing Value Measurement,","Tk3L8t1C7q":"Data matching evaluation is the first step to ensure the basic quality and reliability of the dataset. In this step, we review the accuracy of metadata, such as the format and size of the dataset, and evaluate the completeness and consistency of the dataset. In particular, if the wrong format or stretch is mixed, it will capture the impact on the dataset, and perform the necessary conformity improvement for the next step.","TqR5zX8pKm":"In order to ensure the accuracy of all diagnosis in the future, data consistency measures are performed. This measurement focuses on the basic information of the target dataset to check the overall perfection, consistency and accuracy.","Tp7bZc2Lqw":"When the image size of the dataset is consistent, it is easy to process and is also efficient for model learning. If the image size is different, the characteristics of the original data can be distorted in the process of adjusting the image to the same size, which can negatively affect the performance of the model. Therefore, it is necessary to make sure that all images have the same resolution. This process is the basis of the quality improvement of the data, reducing the complexity of the data pretreatment process.","Ta3Jd9FrXs":"The number of channels in the image determines how the image expresses color information. In general, black and white images use 1 channel and color image 3 channels (RGB). For model learning, all images in the dataset must have the same number of channels. If the number of channels is different, an error may occur when the image data is interpreted, which may reduce the consistency and accuracy of the model in the learning process. Even if the number of channels is the same, it may be different, so attention is required. (Yes, RGBA, CMYK)","T1134c45b":"Utilizing lightweight datasets","Tk8nH5Gp2Y":"The label is a tin information that describes the properties of the image and is used as a reference point for learning the model in the guidance learning. Therefore, the accuracy of the label is an important factor that determines the performance of the model. Since the label error can be directly connected to the error of the learning results, it is very important in the data pretreatment process to make sure it is labeled according to the work features. The criteria for determining the accuracy of the label depend on the tasks, such as sorting, detection, and split work.","Tj6aV4Rq7B":"Missing tests are the stage of evaluating the integrity of the data. First, identify the context of why the missing value occurs, and then confirm the effect of the side value on the analysis results. After that, we identify the pattern and distribution of the side of the side to determine and propose the appropriate data processing method. This step allows you to understand the characteristics of the dataset more deeply and effectively manage missing information.","Tl1mD8cN3w":"Missing value measurement is conducted to evaluate the integrity of the data. To do this, we compare the number of data before and after and then after applying the rules of the conclusion processing rules (for example, the side of the side). If there is a difference in the number of data before and after the pre -treatment of the side, there is a high possibility of loss or error in the original dataset.","Ts6Ym2J9vE":"Class balance measurement is a step to assess the distribution of classes between classes in the dataset. First, identify the frequency of data for each class to identify the imbalance, and if necessary, the data set is balanced by applying the data reassurance or sampling technique. This step can be prevented from the deflection of the model and improved by improving the performance, which can ensure the accuracy and reliability of the analysis results.","Tg4iP1sW7c":"Only for datasets with a pre -defined class, the balance measurement by class according to the number of data is performed. To do this, check the statistical information of the training datasets and test datasets for each class.","T5b4fc0b2":"Class-level optimization","Tf9oQ6tB1v":"Statistics measurement is a statistically summary of major indicators that show all dataset. The main indicators include the total number of data, the range of data values, and the type of distribution, and also includes basic statistics that are essential for understanding the data. Through this step, it identifies the scope and diversity of the dataset and detects the above value, and identifies the statistical characteristics of the dataset in advance to perform the diagnosis of the future and facilitate interpretation.","Th2wE5zK0n":"Analysis is performed to identify the statistical characteristics of the entire dataset.","Tb3kR4pA6x":"Analysis is performed to identify the statistical characteristics of each class dataset.","Tu8xZ5mS7d":"The average image for each class allows you to gauge the class unit trend for the color, form, and pattern of the dataset. Compared to the total average image or other classes, you can also see the specificity of the class.","Tz8Ph5A7Lp":"The selection and imaging of datasens is to analyze the properties of the dataset by selecting a suitable neural network model, a lens. First, select the existing model that accurately reflects the structure of the dataset and can extract the important characteristics properly. Based on this, we measure the geometric properties of the dataset, such as density and distance, and analyze the complexity and diversity of the data. This step allows you to identify the basic structure and pattern of the data, and draw a sketch for establishing subsequent analysis and modeling strategies.","Tk9X4D7V3n":"In order to derive the most efficient analysis results, a large -scale dataset of various domains is used to select a model that best reflects the characteristics of the data among the pre -learned deep learning neural network models. Examples of existing neural networks include Lenet, Resnet-101 (Resnet-101), and Vision Transformer.","T4d151e34":"Data Bulk-up","Th7Ys2B8q1":"It is the dimensional size of the feature vector printed through the layer of the selected neural network model. This value is the result of the most appropriate optimization of the dimensions that do not lose the complexity and diversity of the dataset. However, the data lens of the level II uses the dimensions of the existing neural network, so it is relatively high for the target dataset.","Tp3B9v8C2q":"Extract the feature vectors by passing the entire dataset to the existing neural network. At this time, the characteristic vector can accurately reflect the structure of the source data, and then Impeded the values in the manifold in the imaging space and measure the distance and density from the geometric point of view. At this time, the dimensional size of the imaging space is the same as the observation, and the value of each feature vector is one -on -one response to the original data point. Therefore, in order to interpret what the thousands of vector values of hundreds of hundreds of dimensions in the manifold space are meant, the structural step must be essential. In this report, there are two main methods, that is, geometric properties and distribution properties.","Tq5F3M7s4R":"The geometric property observation is a step of visualizing and observing how far each point in the dataset is in a high -level space. In this case, the two -dimensional PCA visualizes the geometric trend of data that is not revealed in Level I, such as manifold shape in multi -dimensional space and tendency to cluster local clustering. Through this step, you can assess the geometric complexity of the dataset, and clearly grasp the hidden patterns and structural characteristics.","Tx2K7g9H6p":"Observe the overall structure of the feature vectors and the distribution in the multi -dimensional space. Based on this, you can identify the main geometric characteristics and tendency of the dataset. The optimal observation that calculates the distance and density is still high for visualization, so it is possible to reduce the dimensions by reducing the dimensions using the two -dimensional PCA technique to identify the characteristic vector values at a glance.","Tb5L1p8Z3n":"This is a two -dimensional PCA result to visualize the high -level imaging results obtained with the data lens. In the chart, the origin point (Origin) is a vector value in the imaging space, the average image feature in the imaging space. The higher the diversity of the image, the greater the distance of the average and average image features.","Tw7G6q9M2z":"Observe data imaging results in manifolds in multidimensional space. Horizontal axes are representative classes. The vertical axis is the average of the size of the characteristic vectors in the class, which corresponds to the average of the distance from the origin. The minimum/maximum distance from the origin is displayed together to assess the entire size of the manifold and the specificity of each class. The average image of high diversity data is not similar to any image of the dataset, so it is generally only outside the minimum/maximum section. The data lenses used for level II diagnosis are domain neutral, so the average image is usually present in the minimum/maximum section.","Tj9S2h4P6e":"In the topical property measurement, the properties of the individual feature vector values are analyzed in more detail. For example, you can find exception data samples that look like an overtake.","Tr3W8n7L5c":"Street -based similarity search results for representative images by class. For example, you can draw 10 closest or farthest data for a given data. This allows you to identify the topical specificity inside the dataset. This helps to identify the overlapping and duplicate images present in the dataset.","TAb9cFdR7n":"In the multidimensional manifold, which is the result of data imaging, the density is calculated by calculating the distance from each data to the data. The more different data around the specific data, the higher the density, the lower the density. Dense data is likely to be duplicate, and low density data is likely to be an overlapping. Visualization of density is visualized through a two -dimensional PCA, not an observation. The deeper the red color, the more density of the data. In the case of a density measurement by class, a total of 12 classes representing the distribution of density are selected to show the results.","TVx5iQs6Lp":"It is similar to density measurement (I), but adds contours so that the distribution of density can be observed with the macroscopic distribution of the data. With the density, the cluster of the macroscopic distribution makes it easier.","TWk2zHr9Ge":"Distributable attribute observation is a statistically observed step by visualizing how each point in the dataset is scattered in a high -level space. At this time, histogram, etc., visualizes the macroscopic trend of density, distribution range, coverage, and bias between data points. Through this step, you can understand the overall distribution tendency of the dataset, and specify the various patterns related to this to ensure the essential evidence for data modeling and prediction strategies.","T17b8bc8a":"Enhanced class distinction","TQw1fYz4Rj":"The data imaging results are statistically observed in the manifold in the multidimensional space. The horizontal axis represents the frequency of each vector value, the distance from the origin point for each vector value. At this time, the graph marked by the dotted line indicates the flat frequency of the class. This allows you to understand the distribution of a specific class in the manifold. It shows four charts around the reference point. (1) distance from manifold origin, (2) distance from (3) data center at classes by class, (4) distance from data center","TGp7kSv2Xl":"It shows the distribution of each data density in two charts. First, in the histogram chart, the horizontal axis is density, the vertical axis is the frequency of density. The histogram allows you to understand the overall density distribution of the dataset. In particular, it helps to understand the distribution of the same level as an edge case. The second chart shows the density distribution of representative classes on the box (BOX-WHISKER) chart. Representative classes are arranged in the order of density and can be compared with average density. In the future, if you improve data quality (bulk up/diet), you can also see that the distribution of density is improved.","TUc9bNm5Df":"In the entire dataset that does not take into account the class, the highest density samples and the lowest samples are detected each, and each class repeats this task repeatedly. Based on this, you can effectively check the data corresponding to similar or overlapping data and overlapping.","TBx8jVa3Zq":"After the quality diagnosis, it shows the unusual samples you need to take a look at the domain knowledge. First of all, it is a peculiar sample of the density point of view. In the entire distribution and the most density for each class, it shows 20 low samples. Dense samples are more likely to correspond to similar/duplicate data and will be subject to data diet later. Low dense samples are unusual samples. Depending on the goal work, bulk -ups may be required to maintain an edge case, remove outlocks, or add data around to increase density.","TNk4sHt7Py":"In addition to selecting the appropriate dataset to capture the complex characteristics of the large dataset, the data imaging is carried out after processing the lens to the dataset. In particular, in the case of level III, it is diagnosed with improvement such as synthetic data creation (data bulk up), redundant data removal (data diet), and reproduction data creation (data replica) for safe data distribution. If you need to improve, you will use your own lens to create synthetic data immediately based on imaging results.","TYz2xFb3Mc":"Observation is a dimension of the feature vector output through the layer of the selected neural network model. This value is the result of the most appropriate optimization of the dimensions that do not lose the complexity and diversity of the dataset. Unlike level II, the data -specific lens of the level III is processed in accordance with the characteristics of the target dataset, so you can exclude unnecessary elements (for example, backgrounds in images) and capture the target objects required for work. Since a feature vector that does not reflect the inherent characteristics of the data is removed, it can be observed in a much less reduced level of level II. Using the level III's data lens, the feature vectors that reflect the inherent characteristics of the input dataset are the minimum dimension, which is called the intrinsic dimension. In Pevlus, we develop our own and use patented technology to derive the intrinsic dimensions fiercely.","T4kP9rL8s2":"Select the datalens type type by comprehensively considering the purpose of learning, applying the model, and the characteristics of the data. In particular, the method that best suits the structure and complexity of the dataset is applied.","Ta9Qd7RfK3":"In this step, we visualize the geometric properties of the dataset obtained through the data custom lens to observe more in -depth. In Level II, which uses existing neural networks, we examine more precisely interrelationship between data points, spatial placements, and complex structures in datasets. Through this step, you can establish a more advanced data analysis strategy by identifying geometric complexity and patterns due to the inherent attributes of data.","Td40b12ad":"Improved accessibility and analysis efficiency","Tp9zN6hF4q":"In this step, we visualize the distribution properties of the dataset obtained through the data custom lens to observe more in -depth. Similarly, we will examine more precisely the density, dispersion, and distributed characteristics between data points that are difficult to identify in Level II. Through this step, you can accurately recognize the distribution complexity and diversity of the dataset to establish a more sophisticated data modeling strategy.","T8620e1e1":"Level I Diagnostic Summary","T5434916d":"Level II Diagnostic Summary","Te02dade2":"Data Lens","T2e61ca83":"Imaging","T21cc75cb":"Level III Diagnostic Summary","Tae720178a":"Class Balance","Tc0dd3ddb":"Class Balance Measurement, Statistical Measurement","T9adac293":"Measuring class balance: number of data per class","T26cb3e02":"Average image and average histogram by class","Td1f7dee9a":"Analyze geometry","T79c53761":"Imaging neural network","T34ea5faf":"Feature Extraction","T091a9df4":"Selecting an existing neural network","Tef7d0a95":"Observation of distributional properties","T1519198f":"Data Specific","Tfc1ebc6b":"Intrinsic dimension","Tb55279cc":"Generative neural networks","T4005f571":"Data Lens Processing/Selection","T9c2b417f":"Data Diagnostic Report User Guide","Tca8b8af3":"We will explain in detail the diagnostic procedures of the data clinic and the composition of the diagnostic report.","T83cc89d4":"Quality issues in AI learning data from various perspectives","Ta77034d2":"Intuitively delivered data quality analysis report","T0d461ff9":"We provide data quality analysis reports, including comparison of two datasets and data life cycle management.","Tf7575951":"Helps you understand data with scientific analysis and easy-to-understand explanations based on examples and charts.","Tzfidoac6":"Pebbles Data Clinic provides a baseline quality status of your data, a before and after comparison,","T2d784842":"Provided by Data Clinic","T0cabb80c":"Introducing various data quality analysis reports.","T082672be":"Data Quality Diagnostic Report","Tcd5644c6":"Comprehensive information on AI learning datasets","Tcd84da6f":"Quality diagnosis results and improvement measures","T5e219200":"This is a report containing:","Tf29b4e15":"Data Quality Improvement Report","T9ca2e292":"Data based on data quality diagnosis results","T5dfa8088":"Quality improvement process such as diet and bulking up","Tb3002b42":"Here is a report explaining the effects.","T87085b07":"Data Quality Comparison Report","Tb20a4b7c":"The quality of two datasets, such as similar datasets,","T7dffcb30":"This is a report that provides a detailed comparative evaluation.","Te942faaa":"Data Lifecycle Management Report","Td32cc28f":"Data quality changes over time.","Tee9a07c8":"Market conditions, new regulations, changes in technology, etc.","T24eedd6a":"Considering the current state of the data and the future","T1d5ab9f7":"Suggest a collection plan.","Tb3eu6dtv":"Before and after quality improvement, learning/test data","T11a97557":"Level I Diagnosis","T054f77e7":"Level II Diagnosis","Te317efbe":"Level III Diagnosis","T14718f13":"Structure of a diagnostic report","T577f985c":"Pebblous Data Clinic","Tb5410a97":"","Tc4a4d0ee":"To all the customers who visited us","T1efb5f25":"Thank you.","T4c9e5f0f":"A brave new world is unfolding through the interaction of data and artificial intelligence.","Tb7a69523":"It is a well-known fact that good data makes good artificial intelligence.","Tf400852e":"The domestic Data Basic Act, which came into effect in 2022, also emphasizes the importance of data quality and seeks to foster the data industry.","Tcb132d17":"Pebblous Data Clinic is a proprietary technology that uses AI and for AI.","T4f71bea6":"We aim to contribute to the data/AI industry by improving the quality of artificial intelligence learning data.","T5f28d78e":"I'll explain in more detail below.","T21badfd5":"Pebblous Recommended Diagnostic Report","T541133c8":"Find the diagnostic report you want","Tb486da20":"Customer's business goals","Tk3Ld7a5Us":"It is a key element that determines the direction of data quality diagnosis. By clearly setting the purpose and goal of the diagnosis, it helps to derive efficient diagnostic methods and finally help the diagnostic results can lead to practical business performance.","Tk3Le8b6Vt":"This is related to the tools used in the actual diagnosis (eg algorithm, lens model, etc.), platform, system performance and processing ability. Designed or provided the optimal diagnostic solution for your system infrastructure and technical environment.","Tk3Lf9c7Wu":"This is a matter of efficiently distributing project resources such as input budget. Check the diagnostic range and the details of each stage so that all steps can proceed without disruption within the schedule.","Te3071d78":"Technical Requirements","Tk3Lg0d8Xv":"Considering the comprehensive evaluation opinions on data quality, taking into account the results of the level I, II, III diagnosis and customer communication. It mentions both neutral quality and work -specific quality.","Tk3Lh1e9Yw":"Tk3lh1e9yw","Tk3Li2f0Zx":"Most of the datasets have more duplicate data than you think. Duplicate data is recommended to solve data redundancy because it causes bias in model learning and inefficiency in storage and computational resources. It is also necessary to adjust the amount of data used according to the AI model development process. For example, in the early days, it is more efficient to reduce the size of the data, rather than using the entire dataset for learning. Minimize the use of GPU resources and make it possible to reduce the overall development cycle by enabling fast learning speed.","Tk3Lj3g1Ay":"Class -based optimization is the advanced procedure of the data diet. By default, diets at density of the entire data, but furthermore, the data distribution between each class or each class is carefully analyzed to select the data target data. In other words, it emphasizes the core characteristics of each class, while maintaining the distinction between classes. This allows the AI model to recognize each class faster and more accurately and to maximize the performance of the model. It can be applied to the general tasks besides the data diet distinguished.","T84f1fce8a":"Set budget & timeline","Tk8L9t2C7q":"Data bulk up means adding synthetic data in common terms. However, the Pebellus Data Clinic adds synthetic data with precision burning. In other words, the diagnosis is created through various techniques that find the parts that are relatively lacking in data and create synthetic data. Not only can it improve the distribution quality of the data neutral, but also applicable from a task -specific point of view. For example, bulk -ups relieve the imbalance between classes and help the model to better distinguish each class. It also contributes to strengthening the generalization of the AI model.","Tk5L7t3C7q":"It maintains the original data and statistical and distribution characteristics, but it is a virtual data separate from the source data, so it provides a variety of analysis and the possibility of using data without the risk of exposure of sensitive data. By utilizing data replica, it contributes to accelerating data -based decision -making processes by enhancing data accessibility and greatly improving the target range and utility of analysis.","Tk6L3t7C7q":"Level I Diagnosis is the stage of performing the most basic EDA. In this step, we will conduct data matching assessments, survey tests, class balance measurements, and statistical measurements. In this stage, we will do our work to satisfy the prerequisites necessary for the level II and III progress.","Tk3L2t8C7q":"Data matching evaluation is the first step to ensure the basic quality and reliability of the dataset. In this step, we review the accuracy of metadata, such as the format and size of the dataset, and evaluate the completeness and consistency of the dataset. In particular, if the wrong format or stretch is mixed, it will capture the impact on the dataset, and perform the necessary conformity improvement for the next step.","Tk9L1t9C7q":"In order to ensure the accuracy of all diagnosis in the future, data consistency measures are performed. This measurement focuses on the basic information of the target dataset to check the overall perfection, consistency and accuracy.","Tk5L0t0C7q":"When the image size of the dataset is consistent, it is easy to process and is also efficient for model learning. If the image size is different, the characteristics of the original data can be distorted in the process of adjusting the image to the same size, which can negatively affect the performance of the model. Therefore, it is necessary to make sure that all images have the same resolution. This process is the basis of the quality improvement of the data, reducing the complexity of the data pretreatment process.","Tk6L9t1C7q":"The number of channels in the image determines how the image expresses color information. In general, black and white images use 1 channel and color image 3 channels (RGB). For model learning, all images in the dataset must have the same number of channels. If the number of channels is different, an error may occur when the image data is interpreted, which may reduce the consistency and accuracy of the model in the learning process. Even if the number of channels is the same, it may be different, so attention is required. (Yes, RGBA, CMYK)","Tk7L8t2C7q":"The label is a tin information that describes the properties of the image and is used as a reference point for learning the model in the guidance learning. Therefore, the accuracy of the label is an important factor that determines the performance of the model. Since the label error can be directly connected to the error of the learning results, it is very important in the data pretreatment process to make sure it is labeled according to the work features. The criteria for determining the accuracy of the label depend on the tasks, such as sorting, detection, and split work.","Tk8L7t3C7q":"Missing tests are the stage of evaluating the integrity of the data. First, identify the context of why the missing value occurs, and then confirm the effect of the side value on the analysis results. After that, we identify the pattern and distribution of the side of the side to determine and propose the appropriate data processing method. This step allows you to understand the characteristics of the dataset more deeply and effectively manage missing information.","Tk7R2x9P3m":"Measurement measurement is performed to evaluate the integrity of the data confirmation data. To do this, we compare the number of data before and after and then after applying the rules of the conclusion processing rules (for example, the side of the side). If there is a difference in the number of data before and after the pre -treatment of the side, there is a high possibility of loss or error in the original dataset.","Tk5N6y2D8k":"Class balance measurement is a step to assess the distribution of classes between classes in the dataset. First, identify the frequency of data for each class to identify the imbalance, and if necessary, the data set is balanced by applying the data reassurance or sampling technique. This step can be prevented from the deflection of the model and improved by improving the performance, which can ensure the accuracy and reliability of the analysis results.","Tk9Z4x1Y6c":"Only for datasets with a pre -defined class, the balance measurement by class according to the number of data is performed. To do this, check the statistical information of the training datasets and test datasets for each class.","Tk2L7m3F9p":"Statistics measurement is a statistically summary of major indicators that show all dataset. The main indicators include the total number of data, the range of data values, and the type of distribution, and also includes basic statistics that are essential for understanding the data. Through this step, it identifies the scope and diversity of the dataset and detects the above value, and identifies the statistical characteristics of the dataset in advance to perform the diagnosis of the future and facilitate interpretation.","Tk3D8q2W5j":"The overall average image allows you to gauge the overall tendency of the color, form, and pattern of the dataset.","Tk9R5x2B6m":"Level II diagnosis","Tk2M7k4F5p":"The selection and imaging of datasens is to analyze the properties of the dataset by selecting a suitable neural network model, a lens. First, select the existing model that accurately reflects the structure of the dataset and can extract the important characteristics properly. Based on this, we measure the geometric properties of the dataset, such as density and distance, and analyze the complexity and diversity of the data. This step allows you to identify the basic structure and pattern of the data, and draw a sketch for establishing subsequent analysis and modeling strategies.","Tk5H8q3N6r":"To capture the multi -dimensional characteristics of the data more precisely, select the appropriate data lens and proceed with the data imaging.","TabcDeFGh1":"In order to derive the most efficient analysis results, a large -scale dataset of various domains is used to select a model that best reflects the characteristics of the data among the pre -learned deep learning neural network models. Examples of existing neural networks include Lenet, Resnet-101 (Resnet-101), and Vision Transformer.","T1aBcDeFgH":"It is the dimensional size of the feature vector printed through the layer of the selected neural network model. This value is the result of the most appropriate optimization of the dimensions that do not lose the complexity and diversity of the dataset. However, the data lens of the level II uses the dimensions of the existing neural network, so it is relatively high for the target dataset.","Tx5FgHiJkL":"Extract the feature vectors by passing the entire dataset to the existing neural network. At this time, the characteristic vector can accurately reflect the structure of the source data, and then Impeded the values in the manifold in the imaging space and measure the distance and density from the geometric point of view. At this time, the dimensional size of the imaging space is the same as the observation, and the value of each feature vector is one -on -one response to the original data point. In this multi -dimensional space, it is difficult to directly observe the feature vectors and the density calculated from it. Therefore, in order to interpret what the thousands of vector values of hundreds of hundreds of dimensions in the manifold space are meant, the structural step must be essential. In this report, there are two main methods, that is, geometric properties and distribution properties.","T9qRsTuVwX":"The geometric property observation is a step of visualizing and observing how far each point in the dataset is in a high -level space. In this case, the two -dimensional PCA visualizes the geometric trend of data that is not revealed in Level I, such as manifold shape in multi -dimensional space and tendency to cluster local clustering. Through this step, you can assess the geometric complexity of the dataset, and clearly grasp the hidden patterns and structural characteristics.","TpQrStUvWx":"Observe the overall structure of the feature vectors and the distribution in the multi -dimensional space. Based on this, you can identify the main geometric characteristics and tendency of the dataset. The optimal observation that calculates the distance and density is still high for visualization, so it is possible to reduce the dimensions by reducing the dimensions using the two -dimensional PCA technique to identify the characteristic vector values at a glance.","TjK4mN5pQr":"In the topical property measurement, the properties of the individual feature vector values are analyzed in more detail. For example, you can find exception data samples that look like an overtake.","Te5FgH6iJk":"Street -based similarity search results for representative images by class. For example, you can draw 10 closest or farthest data for a given data. This allows you to identify the topical specificity inside the dataset. This helps to identify the overlapping and duplicate images present in the dataset.","TlM7nO8pQr":"In the multidimensional manifold, which is the result of data imaging, the density is calculated by calculating the distance from each data to the data. The more different data around the specific data, the higher the density, the lower the density. Dense data is likely to be duplicate, and low density data is likely to be an overlapping. Visualization of density is visualized through a two -dimensional PCA, not an observation. The deeper the red color, the more density of the data. In the case of a density measurement by class, a total of 12 classes representing the distribution of density are selected to show the results.","Tp5R7z2S8x":"Distributable attribute observation is a statistically observed step by visualizing how each point in the dataset is scattered in a high -level space. At this time, histogram, etc., visualizes the macroscopic trend of density, distribution range, coverage, and bias between data points. Through this step, you can understand the overall distribution tendency of the dataset, and specify the various patterns related to this to ensure the essential evidence for data modeling and prediction strategies.","Tf9M3j6N1v":"In order to overall data before the dimension reduction, it analyzes the shape of the entire manifold based on the geometric characteristics of the data and the distance distribution of each data point.","Tx3W9j2P8m":"In the entire dataset that does not take into account the class, the highest density samples and the lowest samples are detected each, and each class repeats this task repeatedly. Based on this, you can effectively check the data corresponding to similar or overlapping data and overlapping.","Tk9R5x2B8m":"Level III diagnosis","Td7B4z9K3y":"Level III level III analyzes data using customized datas lens made in consideration of domain and characteristics of data. In this stage, the process of designing and learning of data lens processing, that is, a data -specific custom neural network, is combined. The feature of the level III data lens is that the measurement lens and the generated lens are processed into one pair, which can be used directly for future synthetic data. The measurement method is the same as level II.","Tm1S6b4F9p":"In addition to selecting the appropriate dataset to capture the complex characteristics of the large dataset, the data imaging is carried out after processing the lens to the dataset. In particular, in the case of level III, it is diagnosed with improvement such as synthetic data creation (data bulk up), redundant data removal (data diet), and reproduction data creation (data replica) for safe data distribution. If you need to improve, you will use your own lens to create synthetic data immediately based on imaging results.","Th2R4v7G9s":"Observation is a dimension of the feature vector output through the layer of the selected neural network model. This value is the result of the most appropriate optimization of the dimensions that do not lose the complexity and diversity of the dataset. Unlike level II, the data -specific lens of the level III is processed in accordance with the characteristics of the target dataset, so you can exclude unnecessary elements (for example, backgrounds in images) and capture the target objects required for work. Since a feature vector that does not reflect the inherent characteristics of the data is removed, it can be observed in a much less reduced level of level II. Using the level III's data lens, the feature vectors that reflect the inherent characteristics of the input dataset are the minimum dimension, which is called the intrinsic dimension. In Pevlus, we develop our own and use patented technology to derive the intrinsic dimensions fiercely.","Tc8Z1p6E3w":"Select the datalens type type by comprehensively considering the purpose of learning, applying the model, and the characteristics of the data. In particular, the method that best suits the structure and complexity of the dataset is applied.","Ty9X4k1M5j":"In this step, we visualize the geometric properties of the dataset obtained through the data custom lens to observe more in -depth. In Level II, which uses existing neural networks, we examine more precisely interrelationship between data points, spatial placements, and complex structures in datasets. Through this step, you can establish a more advanced data analysis strategy by identifying geometric complexity and patterns due to the inherent attributes of data.","Tb7H3v8L2q":"In this step, we visualize the distribution properties of the dataset obtained through the data custom lens to observe more in -depth. Similarly, we will examine more precisely the density, dispersion, and distributed characteristics between data points that are difficult to identify in Level II. Through this step, you can accurately recognize the distribution complexity and diversity of the dataset to establish a more sophisticated data modeling strategy.","T76bd175ba":"Missing values","Td0a23975a":"Descriptive stats","T166bf2d7":"Level I Diagnosis","T6e83d3a7a-integrity-scan":"Integrity scan","Tiju4g2g9":"It is available from the Basic and above.","Tluhnk9ku":"File download failure","T1c08181d":"View dataset","T1ac62102":"Chart Explorer","T267a29ab":"Download Diagnostic Report","T92807e6a":"Ask Pebbly!","Tf25c6b45":"To Pebbly","T1a4a92f0":"Ask!","Tf85787fb":"In order to successfully carry out a customer project, we establish the foundation necessary for project execution, such as understanding the customer's business goals, deriving data quality and technical requirements, checking constraints, and domain knowledge, and set diagnostic items and timelines based on this.","Tf85787fbf":"Level III goes deepest. We design and train a domain-specific DataLens pair—one lens for measurement, one for generation. After training, we rerun the Level II geometry and distribution checks with task-specific metrics, and the paired lenses become a ready-made engine for future synthetic-data generation.","Tf85787fbb":"We start with a kickoff call to capture your business goals, data-quality and technical requirements, constraints, and domain context. These insights define the diagnostic checklist and timeline.","T61386b85":"Level I diagnosis is the most basic EDA stage. At this stage, Measuring data integrity, Missing value measurement, Class balance measurement, Statistical measurement are performed. This stage corresponds to the preliminary work required for the subsequent Level II and III.","T61386b85b":"Level I is a quick exploratory scan. We verify data integrity, count missing values, measure class balance, and compute key statistics—laying the groundwork for the deeper Level II and Level III analyses to follow.","T0098815b-level-ii-diagnosis":"Level II dives deeper. We apply our pre-trained DataLens to embed the dataset, extract key features, and build an observation space. Cluster geometry, inter-sample distance, and global distribution are then inspected to reveal hidden structure, overlaps, and outliers.","Tff901769":"Level III analyzes data using a custom data lens that is designed considering the domain and characteristics of the data. This step includes processing the data lens, that is, designing and training a data-specific custom neural network. The characteristic of the Level III data lens is that the measurement lens and the generation lens are processed as a pair, so they can be used directly for future synthetic data generation. The measurement method is the same as Level II.","T7ae89521":"We evaluate data quality and suggest directions for improvement by synthesizing the results of level I, II, and III diagnostics.","T7ae89521b":"We merge the insights from Levels I–III into one clear scorecard, rate overall data health, and outline a prioritized action plan to raise quality.","Tc9cc1817":"For more information, please visit the Diagnostic Report Guide","T6f0b8f93":"We'll guide you through the diagnostic process.","Te70f0f28":"You must log in to view the diagnosis.","Tkt2lflqc":"Think of it as a full-service hospital for your data: we diagnose quality issues and prescribe targeted fixes so your AI trains on healthy, reliable datasets.","Troauux6x":"Think of it as a full-service hospital for your data: we diagnose quality issues and prescribe targeted fixes so your AI trains on healthy, reliable datasets.","T4alic3o6":"How Does Data Clinic Work?","Tubolng68":"Our engine blends DataLens embeddings, visual diagnostics, quality scoring, and Data Bulk-Up & Data Diet routines to analyze and optimize your dataset from every angle.","Tm3vllr37":"Our engine blends DataLens embeddings, visual diagnostics, quality scoring, and Data Bulk-Up & Data Diet routines to analyze and optimize your dataset from every angle.","Thcndetz7":"Simple Diagnostic Report","Tjkq6qk7n":"You’ll receive a clear, easy-to-read summary that: (1) Scores problems like missing data, bias, and noise, (2) Lists the best fixes first—based on biggest payoff and least work needed.","T9wii7ib5":"You’ll receive a clear, easy-to-read summary that: (1) Scores problems like missing data, bias, and noise, (2) Lists the best fixes first—based on biggest payoff and least work needed.","Tgkw76ywi":"See a Sample Report","Ts7gijq18":"Preview the depth of our analysis with public-dataset reports from AI Hub (Korea), Kaggle, and Hugging Face.","T87miggih":"Preview the depth of our analysis with public-dataset reports from AI Hub (Korea), Kaggle, and Hugging Face.","T6e3c392b":"Want to Know More?","Tfoq12zlw":"The dataset name is required.","T534f32c5":"Enter a name for the dataset you’d like us to diagnose","T6qgqbhw1":"Example: AnimalFaceDataset_1","T59jdobpx":"Diagnosis is managed under the name you wrote. The name can be modified later on My Page.","Tc3aba47d":"Try core features on Pebblous demo datasets.","Td0f298d9":"Run basic quality checks on public datasets you select.","T7e6db34e":"200,000 diagnostic credits provided!","T51a7f53f":"Full Data Clinic analysis on your own data","Tcd437b86-100-000-diagnostic-credits":"100,000 diagnostic credits","Tcd437b86-2-million-diagnostic-credits":"2 million diagnostic credits","T3c9f5771a":"Large-scale diagnostics plus Data Diet & Bulk-Up services","T27ab096a":"Customer Data Diagnosis — available from Pro plan","T0db1e2a5-need-a-review-for-text-multimodal-or-structured-data":"Need text, multimodal, or structured data? → Contact us for a custom quote","T0db1e2a5-couple":"Choose the plan that fits your dataset size and depth of analysis.","T93d2220c":"View Plan Details","T23735df5":"How to pay for your plan","Td479b709":"Radio spot","T24d19464":"Discounted price!","T0b10a364":"Your","T1245b172":"Here’s your diagnostic-credit balance:","T1245b172-here-s-your-diagnostic-credit-balance":"Here’s your diagnostic-credit status:","T3121ae75":"Add Credits","T561e080a":"Continue","T200e6b63":"On-Demand AI Data Check-up","T055d28b3":"Request Diagnosis","Td806eacf":"See Inside Your Dataset","T2fc9522a":"Pinpoint gaps, noise & bias—fast","Tf85787fba":"Customer Discovery & Alignment — We start by clarifying your business objectives, data constraints, and domain specifics. These inputs define the diagnostic checklist and timeline.","T7ae89521a":"Synthesizing Multi-Level Insights — Findings from Levels I, II, and III are merged into one scorecard, highlighting gaps, risks, and recommended next steps.","T61386b85a":"Data Integrity & Basic EDA — Checks schema consistency, missing values, class balance, and summary statistics—laying the groundwork for deeper analysis.","T0bf08c11a":"General-Purpose DataLens Analysis — Uses our pre-trained “DataLens” to embed your dataset, then examines manifold shape, cluster geometry, and overall distribution to uncover hidden structure and outliers.","Taeb3cb55":"Custom DataLens & Synthetic-Ready Assessment — Builds a domain-tuned measurement lens paired with a generative lens. Repeats Level II analyses while preparing the foundation for future synthetic-data generation.","Tae63c797":"Introducing the Diagnostic Procedure","T01d7e554":"Data Upload Guide","T761ccf7d":"Diagnosable image extensions are jpg, png, jpeg.","T074fd90b":"Login required","T41fb977f-review-and-confirm-before-submitting-your-diagnosis-request":"Review and confirm before submitting your diagnosis request.","Tf2961ffd":"Diagnostic dataset name","T57d44562":"Edit","T90c2d566":"Diagnostic dataset upload file","Tb5b9c074-replace":"Replace","Tb4809b84-credits-for-this-run":"Credits for This Run","T85838040-credits-remaining":"Credits Remaining","T85838040-current-balance":"Current Balance","Tbe433920-start-diagnosis":"Start Diagnosis","T37327800":"Diagnosis Request Submitted!","T079c3a72":"Track its progress anytime on My Page.","Tf2961ffd-dataset-name":"Dataset Name","T7d4522e3-number-of-diagnostic-data":"Items to Diagnose ","T700713ba":"1000 images","T90c2d566-dataset-file":"Dataset File","T90c2d566-diagnostic-dataset-upload-file":"Uploaded File","Tbfd7a61a-status":"Status","T6ce6be8e":"At Pebblous","Te246f596":"Preparing data diagnostics","Tb4a89398":"no see.","T7a304dca-go-to-my-page":"View Progress on My Page ","T7a304dcb":"Preparing data diagnosis in Pebblous.","T7a304dcc":"Data diagnosis has been completed in Pebblous.","T7a304dcd":"The data diagnosis report has been completed in Pebblous.","T823af691":"Use advanced CG software and rendering technology to create visually realistic images or animations.","T94e2b5c7":"Using a deep learning model, it automatically generates new data that mimics the real world data patterns.","T67d3a845":"After creating a virtual environment based on scenarios and equations that express the real world, we create data by modeling and simulating the process and behavior of the real world.","Td5b62f50":"Generate precision targeting\nsynthetic data based\non quality diagnosis","T66c00f78":"Single image 3D creation","T92c7d053":"Single photo -based 3D model generation synthetic data","Td0cf5027":"Drone Detection","T3e5b6f12":"* POSTECH joint research, achievement of SOTA levels","T0481f6e4":"Characters and poses in special environments","Ta552a51b":"Robot autonomous driving field","T3274f99c":"animal behavior","T1c707c2a":"Livestock sector","T74a8b2c9":"3D synthesis data for drone awareness and tracking","T86cacab6":"Diet monitoring","T1d3a5c6e":"Synthesis data for AI learning for figures and attitude recognition in a special environment, such as children's perception within the vehicle","T7b8214cf":"Logistics field","T45c7d8f9":"This is a dataset required for robots such as vacuum cleaners, and AI detects objects and plans optimal driving paths.","T16cba1f9":"Pharmaceutical field","Tb1a2e3c4":"Synthesis data for AI learning to classify mouse behavior and recognize body parts","T3b595aec":"Waste Plastic Recycling Classification AI Synthetic Data","T9f8e7d6c":"After creating a stable diffusion -based LORA model for analysis of small behavioral forms","T5a4b3c2d":"Synthetic data for meals for monitoring. Use hybrids of 3D and generated models","T1c2d3e4f":"This is a dataset for automatically recognizing the inventory of the stand in an environment such as an unmanned shop.","T8a7b6c5d":"In the automatic pill automatic preparation system of large hospitals, a computer vision inspection requires a pill dataset of various forms and configuration. Synthesis data for the detection and coefficient of the pill","T4e5f6a7b":"Synthetic data for AI learning used to recycle resources through plastic material classification","Tc0908391":"Synthetic Dataset: Case Study","Taf263dcc":"Download sample images","Td74fecc0":"Another unique strength of Pebblous is\nits high-quality synthetic data.\nWhen data is scarce or expensive to\nobtain, using synthetic data to\ntrain AI is the most powerful\nand efficient approach.","T186eb858":"Christmas Capsules, 2019. Digital media, Lee Ju-haeng","T547150129-my-page-free-interactive-web-diagnostic-report-description":"interactive view (public data)","T1dd5d946c":"You can rename the project later in My Page.","T1dd5d949-diagnostics-for":"Diagnostics for","T1dd5d949-hi":"Hi,","T1dd5d950":"Pre-trained","T1dd5d951":"name (no space: first name+last name)","footer":{"about":"About Us","case":"Use Cases","sample":"Sample Report","guide":"Report Guide","request":"Request Diagnosis","blog":"Blog","contact":"Contact Us","terms":"Terms of Service","privacy":"Privacy Policy","bizName":"Company: Pebblous Inc.","bizCeo":"CEO: Joohaeng Lee","bizRegNo":"Business Reg. No.: 584-86-02422","bizAddress":"Address: 507, 99 Daehak-ro, Yuseong-gu, Daejeon, South Korea","bizPhone":"Customer Service: +82 044-589-3824","bizCopyright":"© Pebblous Inc. All rights reserved.","bizHosting":"Hosting Provider: Pebblous Inc."},"meta":{"case":{"title":"Data Quality Management Best Practices: AI Case Studies | Data Clinic","description":"Learn data quality management best practices through our case studies. See how 5% synthetic data optimizes model performance & enterprise AI ROI.","keywords":"data quality management best practices"},"about":{"title":"Data-Centric AI Platform for Enterprise Excellence | Data Clinic","description":"Drive AI performance with our Data-Centric AI platform. We provide precision synthetic data and diagnostics to break through model limitations.","keywords":"data centric ai platform"},"skill":{"title":"Mastering Data Readiness for AI with Data Clinic","description":"Master your data readiness for AI. We provide advanced diagnostics and treatment to ensure high-quality, reliable data for your enterprise models.","keywords":"data readiness for ai"},"price":{"title":"Data Quality Management Software Pricing & Plans | Data Clinic","description":"Find the right plan for your team. Transparent pricing for our data quality management software, from AI diagnostics to synthetic data generation.","keywords":"Data Quality Management Software"},"report":{"title":"Diagnosis Reports","description":"Browse AI data quality diagnosis reports."},"reportGuide":{"title":"Guide to Data Quality Management Service & Diagnostic Reports","description":"Explore our data quality management service. We transform complex AI data issues into intuitive, easy-to-understand diagnostic reports.","keywords":"Data Quality Management Service"},"reportSample":{"title":"Data Quality Assessment Report Samples | Data Clinic","description":"Check out our data quality assessment report samples. We provide precision analysis for AI datasets in defense, manufacturing, and automotive sectors.","keywords":"data quality assessment report"},"contactUs":{"title":"Contact Us","description":"Contact us about Data Clinic services."},"pebblous":{"title":"Pebblous is a comprehensive data hospital.","description":"Pebblous is a deep-tech leader founded by AI and data experts. We provide world-class data management technology and professional data diagnostics.","keywords":"Pebblous"},"syntheticData":{"title":"Synthetic Data Generation Tool for AI Edge Cases | Data Clinic","description":"Solve AI Edge Cases with our synthetic data generation tool. We bridge the data gap with precision-driven, targeted datasets.","keywords":"synthetic data generation tool"},"request":{"title":"Advanced Data Diagnostics for AI Integrity | Data Clinic","description":"Start your data diagnostics with Data Clinic. We provide advanced verification for enterprise AI to ensure flawless, high-quality datasets.","keywords":"data diagnostics"},"dataSet":{"title":"High-Precision AI Training Datasets | Data Clinic","description":"Get specialized AI training data for Defense, Public, and Manufacturing sectors. High-quality datasets optimized for complex environments by Data Clinic.","keywords":"AI Training Datasets"},"home":{"title":"AI Readiness Assessment & Synthetic Data Solutions | Data Clinic","description":"Break through AI performance barriers. We provide AI readiness assessment services and synthetic data solutions driven by Data-Centric AI philosophy.","keywords":"ai readiness assessment services"},"comingSoon":{"title":"Coming Soon | Data Clinic"},"terms":{"title":"Terms of Service","description":"Data Clinic Terms of Service."},"privacy":{"title":"Privacy Policy","description":"Data Clinic Privacy Policy."}}},"children":["$","$L14",null,{"defaultSettings":{"themeMode":"light","themeDirection":"ltr","themeContrast":"default","themeLayout":"vertical","themeColorPresets":"default","themeStretch":false},"children":["$","$L15",null,{"children":["$","$L16",null,{"children":["$","$L17",null,{"children":["$","$L18",null,{"children":[["$","$L19",null,{}],["$","$L1a",null,{}],["$","$L6",null,{"parallelRouterKey":"children","segmentPath":["children","$7","children"],"error":"$1b","errorStyles":[],"errorScripts":[],"template":["$","$L9",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":["$","$L1c",null,{}],"notFoundStyles":[]}]]}]}]}]}]}]}]