9595Name: "Jane Smith", Email: "[email protected] " 9696```
9797
98+ ## Advanced Examples
99+
100+ ### Complex Data Preprocessing and Validation
101+
102+ This example demonstrates the full power of fileprep: combining multiple preprocessors and validators to clean and validate real-world messy data.
103+
104+ ``` go
105+ package main
106+
107+ import (
108+ " fmt"
109+ " strings"
110+
111+ " github.com/nao1215/fileprep"
112+ )
113+
114+ // Employee represents employee data with comprehensive preprocessing and validation
115+ type Employee struct {
116+ // ID: pad to 6 digits, must be numeric
117+ EmployeeID string ` name:"id" prep:"trim,pad_left=6:0" validate:"required,numeric,len=6"`
118+
119+ // Name: clean whitespace, required alphabetic with spaces
120+ FullName string ` name:"name" prep:"trim,collapse_space" validate:"required,alphaspace"`
121+
122+ // Email: normalize to lowercase, validate format
123+ Email string ` prep:"trim,lowercase" validate:"required,email"`
124+
125+ // Department: normalize case, must be one of allowed values
126+ Department string ` prep:"trim,uppercase" validate:"required,oneof=ENGINEERING SALES MARKETING HR"`
127+
128+ // Salary: keep only digits, validate range
129+ Salary string ` prep:"trim,keep_digits" validate:"required,numeric,gte=30000,lte=500000"`
130+
131+ // Phone: extract digits, validate E.164 format after adding country code
132+ Phone string ` prep:"trim,keep_digits,prefix=+1" validate:"e164"`
133+
134+ // Start date: validate datetime format
135+ StartDate string ` name:"start_date" prep:"trim" validate:"required,datetime=2006-01-02"`
136+
137+ // Manager ID: required only if department is not HR
138+ ManagerID string ` name:"manager_id" prep:"trim,pad_left=6:0" validate:"required_unless=Department HR"`
139+
140+ // Website: fix missing scheme, validate URL
141+ Website string ` prep:"trim,lowercase,fix_scheme=https" validate:"url"`
142+ }
143+
144+ func main () {
145+ // Messy real-world CSV data
146+ csvData := ` id,name,email,department,salary,phone,start_date,manager_id,website
147+ 42, John Doe ,[email protected] ,engineering,$75,000,555-123-4567,2023-01-15,000001,company.com/john 148+ 7,Jane Smith,[email protected] , Sales ,"$120,000",(555) 987-6543,2022-06-01,000002,WWW.LINKEDIN.COM/in/jane 149+ 123,Bob Wilson,[email protected] ,HR,45000,555.111.2222,2024-03-20,, 150+ 99,Alice Brown,[email protected] ,Marketing,$88500,555-444-3333,2023-09-10,000003,https://alice.dev 151+ `
152+
153+ processor := fileprep.NewProcessor (fileprep.FileTypeCSV )
154+ var employees []Employee
155+
156+ _ , result , err := processor.Process (strings.NewReader (csvData), &employees)
157+ if err != nil {
158+ fmt.Printf (" Fatal error: %v \n " , err)
159+ return
160+ }
161+
162+ fmt.Printf (" === Processing Result ===\n " )
163+ fmt.Printf (" Total rows: %d , Valid rows: %d \n\n " , result.RowCount , result.ValidRowCount )
164+
165+ for i , emp := range employees {
166+ fmt.Printf (" Employee %d :\n " , i+1 )
167+ fmt.Printf (" ID: %s \n " , emp.EmployeeID )
168+ fmt.Printf (" Name: %s \n " , emp.FullName )
169+ fmt.Printf (" Email: %s \n " , emp.Email )
170+ fmt.Printf (" Department: %s \n " , emp.Department )
171+ fmt.Printf (" Salary: %s \n " , emp.Salary )
172+ fmt.Printf (" Phone: %s \n " , emp.Phone )
173+ fmt.Printf (" Start Date: %s \n " , emp.StartDate )
174+ fmt.Printf (" Manager ID: %s \n " , emp.ManagerID )
175+ fmt.Printf (" Website: %s \n\n " , emp.Website )
176+ }
177+ }
178+ ```
179+
180+ Output:
181+ ```
182+ === Processing Result ===
183+ Total rows: 4, Valid rows: 4
184+
185+ Employee 1:
186+ ID: 000042
187+ Name: John Doe
188+ 189+ Department: ENGINEERING
190+ Salary: 75000
191+ Phone: +15551234567
192+ Start Date: 2023-01-15
193+ Manager ID: 000001
194+ Website: https://company.com/john
195+
196+ Employee 2:
197+ ID: 000007
198+ Name: Jane Smith
199+ 200+ Department: SALES
201+ Salary: 120000
202+ Phone: +15559876543
203+ Start Date: 2022-06-01
204+ Manager ID: 000002
205+ Website: https://www.linkedin.com/in/jane
206+
207+ Employee 3:
208+ ID: 000123
209+ Name: Bob Wilson
210+ 211+ Department: HR
212+ Salary: 45000
213+ Phone: +15551112222
214+ Start Date: 2024-03-20
215+ Manager ID: 000000
216+ Website:
217+
218+ Employee 4:
219+ ID: 000099
220+ Name: Alice Brown
221+ 222+ Department: MARKETING
223+ Salary: 88500
224+ Phone: +15554443333
225+ Start Date: 2023-09-10
226+ Manager ID: 000003
227+ Website: https://alice.dev
228+ ```
229+
230+
231+ ### Detailed Error Reporting
232+
233+ When validation fails, fileprep provides precise error information including row number, column name, and specific validation failure reason.
234+
235+ ``` go
236+ package main
237+
238+ import (
239+ " fmt"
240+ " strings"
241+
242+ " github.com/nao1215/fileprep"
243+ )
244+
245+ // Order represents an order with strict validation rules
246+ type Order struct {
247+ OrderID string ` name:"order_id" validate:"required,uuid4"`
248+ CustomerID string ` name:"customer_id" validate:"required,numeric"`
249+ Email string ` validate:"required,email"`
250+ Amount string ` validate:"required,number,gt=0,lte=10000"`
251+ Currency string ` validate:"required,len=3,uppercase"`
252+ Country string ` validate:"required,alpha,len=2"`
253+ OrderDate string ` name:"order_date" validate:"required,datetime=2006-01-02"`
254+ ShipDate string ` name:"ship_date" validate:"datetime=2006-01-02,gtfield=OrderDate"`
255+ IPAddress string ` name:"ip_address" validate:"required,ip_addr"`
256+ PromoCode string ` name:"promo_code" validate:"alphanumeric"`
257+ Quantity string ` validate:"required,numeric,gte=1,lte=100"`
258+ UnitPrice string ` name:"unit_price" validate:"required,number,gt=0"`
259+ TotalCheck string ` name:"total_check" validate:"required,eqfield=Amount"`
260+ }
261+
262+ func main () {
263+ // CSV with multiple validation errors
264+ csvData := ` order_id,customer_id,email,amount,currency,country,order_date,ship_date,ip_address,promo_code,quantity,unit_price,total_check
265+ 550e8400-e29b-41d4-a716-446655440000,12345,[email protected] ,500.00,USD,US,2024-01-15,2024-01-20,192.168.1.1,SAVE10,2,250.00,500.00 266+ invalid-uuid,abc,not-an-email,-100,US,USA,2024/01/15,2024-01-10,999.999.999.999,PROMO-CODE-TOO-LONG!!,0,0,999
267+ 550e8400-e29b-41d4-a716-446655440001,,bob@test,50000,EURO,J1,not-a-date,,2001:db8::1,VALID20,101,-50,50000
268+ 123e4567-e89b-42d3-a456-426614174000,99999,[email protected] ,1500.50,JPY,JP,2024-02-28,2024-02-25,10.0.0.1,VIP,5,300.10,1500.50 269+ `
270+
271+ processor := fileprep.NewProcessor (fileprep.FileTypeCSV )
272+ var orders []Order
273+
274+ _ , result , err := processor.Process (strings.NewReader (csvData), &orders)
275+ if err != nil {
276+ fmt.Printf (" Fatal error: %v \n " , err)
277+ return
278+ }
279+
280+ fmt.Printf (" === Validation Report ===\n " )
281+ fmt.Printf (" Total rows: %d \n " , result.RowCount )
282+ fmt.Printf (" Valid rows: %d \n " , result.ValidRowCount )
283+ fmt.Printf (" Invalid rows: %d \n " , result.RowCount -result.ValidRowCount )
284+ fmt.Printf (" Total errors: %d \n\n " , len (result.ValidationErrors ()))
285+
286+ if result.HasErrors () {
287+ fmt.Println (" === Error Details ===" )
288+ for _ , e := range result.ValidationErrors () {
289+ fmt.Printf (" Row %d , Column '%s ': %s \n " , e.Row , e.Column , e.Message )
290+ }
291+ }
292+ }
293+ ```
294+
295+ Output:
296+ ```
297+ === Validation Report ===
298+ Total rows: 4
299+ Valid rows: 1
300+ Invalid rows: 3
301+ Total errors: 23
302+
303+ === Error Details ===
304+ Row 2, Column 'order_id': value must be a valid UUID version 4
305+ Row 2, Column 'customer_id': value must be numeric
306+ Row 2, Column 'email': value must be a valid email address
307+ Row 2, Column 'amount': value must be greater than 0
308+ Row 2, Column 'currency': value must have exactly 3 characters
309+ Row 2, Column 'country': value must have exactly 2 characters
310+ Row 2, Column 'order_date': value must be a valid datetime in format: 2006-01-02
311+ Row 2, Column 'ip_address': value must be a valid IP address
312+ Row 2, Column 'promo_code': value must contain only alphanumeric characters
313+ Row 2, Column 'quantity': value must be greater than or equal to 1
314+ Row 2, Column 'unit_price': value must be greater than 0
315+ Row 2, Column 'ship_date': value must be greater than field OrderDate
316+ Row 2, Column 'total_check': value must equal field Amount
317+ Row 3, Column 'customer_id': value is required
318+ Row 3, Column 'email': value must be a valid email address
319+ Row 3, Column 'amount': value must be less than or equal to 10000
320+ Row 3, Column 'currency': value must have exactly 3 characters
321+ Row 3, Column 'country': value must contain only alphabetic characters
322+ Row 3, Column 'order_date': value must be a valid datetime in format: 2006-01-02
323+ Row 3, Column 'quantity': value must be less than or equal to 100
324+ Row 3, Column 'unit_price': value must be greater than 0
325+ Row 3, Column 'ship_date': value must be greater than field OrderDate
326+ Row 4, Column 'ship_date': value must be greater than field OrderDate
327+ ```
328+
98329## Preprocessing Tags (` prep ` )
99330
100331Multiple tags can be combined: ` prep:"trim,lowercase,default=N/A" `
@@ -225,7 +456,21 @@ Multiple tags can be combined: `validate:"required,email"`
225456| ` https_url ` | Valid HTTPS URL | ` validate:"https_url" ` |
226457| ` url_encoded ` | URL encoded string | ` validate:"url_encoded" ` |
227458| ` datauri ` | Valid data URI | ` validate:"datauri" ` |
228- | ` uuid ` | Valid UUID | ` validate:"uuid" ` |
459+ | ` datetime=layout ` | Valid datetime matching Go layout | ` validate:"datetime=2006-01-02" ` |
460+ | ` uuid ` | Valid UUID (any version) | ` validate:"uuid" ` |
461+ | ` uuid3 ` | Valid UUID version 3 | ` validate:"uuid3" ` |
462+ | ` uuid4 ` | Valid UUID version 4 | ` validate:"uuid4" ` |
463+ | ` uuid5 ` | Valid UUID version 5 | ` validate:"uuid5" ` |
464+ | ` ulid ` | Valid ULID | ` validate:"ulid" ` |
465+ | ` e164 ` | Valid E.164 phone number | ` validate:"e164" ` |
466+ | ` latitude ` | Valid latitude (-90 to 90) | ` validate:"latitude" ` |
467+ | ` longitude ` | Valid longitude (-180 to 180) | ` validate:"longitude" ` |
468+ | ` hexadecimal ` | Valid hexadecimal string | ` validate:"hexadecimal" ` |
469+ | ` hexcolor ` | Valid hex color code | ` validate:"hexcolor" ` |
470+ | ` rgb ` | Valid RGB color | ` validate:"rgb" ` |
471+ | ` rgba ` | Valid RGBA color | ` validate:"rgba" ` |
472+ | ` hsl ` | Valid HSL color | ` validate:"hsl" ` |
473+ | ` hsla ` | Valid HSLA color | ` validate:"hsla" ` |
229474
230475### Network Validators
231476
@@ -237,6 +482,7 @@ Multiple tags can be combined: `validate:"required,email"`
237482| ` cidr ` | Valid CIDR notation | ` validate:"cidr" ` |
238483| ` cidrv4 ` | Valid IPv4 CIDR | ` validate:"cidrv4" ` |
239484| ` cidrv6 ` | Valid IPv6 CIDR | ` validate:"cidrv6" ` |
485+ | ` mac ` | Valid MAC address | ` validate:"mac" ` |
240486| ` fqdn ` | Valid fully qualified domain name | ` validate:"fqdn" ` |
241487| ` hostname ` | Valid hostname (RFC 952) | ` validate:"hostname" ` |
242488| ` hostname_rfc1123 ` | Valid hostname (RFC 1123) | ` validate:"hostname_rfc1123" ` |
@@ -255,6 +501,15 @@ Multiple tags can be combined: `validate:"required,email"`
255501| ` fieldcontains=Field ` | Value contains another field's value | ` validate:"fieldcontains=Keyword" ` |
256502| ` fieldexcludes=Field ` | Value excludes another field's value | ` validate:"fieldexcludes=Forbidden" ` |
257503
504+ ### Conditional Required Validators
505+
506+ | Tag | Description | Example |
507+ | -----| -------------| ---------|
508+ | ` required_if=Field value ` | Required if field equals value | ` validate:"required_if=Status active" ` |
509+ | ` required_unless=Field value ` | Required unless field equals value | ` validate:"required_unless=Type guest" ` |
510+ | ` required_with=Field ` | Required if field is present | ` validate:"required_with=Email" ` |
511+ | ` required_without=Field ` | Required if field is absent | ` validate:"required_without=Phone" ` |
512+
258513## Supported File Formats
259514
260515| Format | Extension | Compressed |
0 commit comments