Advanced Usage
PostgreSQL Support
The GS1 GPC tool supports both SQLite and PostgreSQL databases. To use PostgreSQL, you need to install the PostgreSQL extra:
pip install gs1_gpc[postgresql]
Then you can use the --db-type postgresql option with the gpc import-gpc command:
gpc import-gpc --db-type postgresql --db-file "postgresql://user:password@localhost/dbname"
The connection string format is:
postgresql://username:password@hostname:port/database
Custom XML Files
You can use your own XML files instead of downloading them from the GS1 API:
gpc import-gpc --xml-file ./my_custom_file.xml
The XML file must follow the GS1 GPC XML format with the following structure:
<schema>
<segment code="10000000" text="Segment Description">
<family code="10100000" text="Family Description">
<class code="10100100" text="Class Description">
<brick code="10100101" text="Brick Description">
<attType code="20000001" text="Attribute Type Description">
<attValue code="30000001" text="Attribute Value Description" />
<attValue code="30000002" text="Attribute Value Description" />
</attType>
</brick>
</class>
</family>
</segment>
</schema>
Logging
You can control the logging level with the --verbose and --quiet options:
# Enable detailed debug logging
gpc import-gpc --verbose
# Suppress all logging except errors
gpc import-gpc --quiet
Programmatic Usage
You can use the GS1 GPC tool as a Python library in your own code using the class-based API:
from gs1_gpc.db import DatabaseConnection, setup_database
from gs1_gpc.parser import GPCParser
from gs1_gpc.downloader import GPCDownloader
from gs1_gpc.exporter import GPCExporter
# Create a downloader instance
downloader = GPCDownloader(download_dir="/path/to/downloads", language_code="en")
# Download the latest GPC data
xml_file = downloader.download_latest_gpc_xml()
# Create database connection
db_connection = DatabaseConnection('my_database.sqlite3')
# Setup database
setup_database(db_connection)
# Create parser and process XML file
parser = GPCParser(db_connection)
parser.process_xml(xml_file)
# Close database connection
db_connection.close()
# Export database to SQL
exporter = GPCExporter(export_dir="/path/to/exports", language_code="en")
exporter.dump_database_to_sql('my_database.sqlite3')
Using Models and Callbacks
You can use the models and callbacks to process GPC data in a more structured way:
from gs1_gpc.db import DatabaseConnection, setup_database
from gs1_gpc.parser import GPCParser
from gs1_gpc.models import GPCModels
from gs1_gpc.callbacks import GPCProcessedCallback
# Custom callback implementation
class MyCallback(GPCProcessedCallback):
def on_brick_processed(self, brick_code, brick_desc, class_code, is_new):
print(f"Processed brick: {brick_code} - {brick_desc}")
def on_processing_complete(self, counters):
print(f"Processing complete. Processed {counters['bricks_processed']} bricks.")
# Create database connection
db_connection = DatabaseConnection('my_database.sqlite3')
setup_database(db_connection)
# Create parser with callback and process XML file
callback = MyCallback()
parser = GPCParser(db_connection, callback=callback)
parser.process_xml('gpc_data.xml')
Food Segment Example
The package includes an advanced example that demonstrates how to import only the Food/Beverage segment:
1#!/usr/bin/env python3
2"""
3Advanced example script demonstrating how to import only the Food/Beverage segment (50000000)
4from GS1 GPC data.
5"""
6
7import os
8import logging
9import xml.etree.ElementTree as ET
10from gs1_gpc.db import DatabaseConnection, setup_database
11from gs1_gpc.parser import GPCParser
12from gs1_gpc.downloader import GPCDownloader
13from gs1_gpc.callbacks import GPCProcessedCallback
14
15# Configure logging
16logging.basicConfig(
17 level=logging.INFO,
18 format='%(asctime)s - %(levelname)s - %(message)s'
19)
20
21# Get script directory
22SCRIPT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
23
24# Define paths
25GPC_DOWNLOAD_DIR = os.path.join(SCRIPT_DIR, 'data', 'imports')
26DB_FILE = os.path.join(SCRIPT_DIR, 'data', 'instances', 'food_segment_import.sqlite3')
27
28# Define the Food/Beverage segment code
29FOOD_SEGMENT_CODE = "50000000"
30
31
32class FoodSegmentFilter(GPCProcessedCallback):
33 """Callback to filter and process only the Food/Beverage segment."""
34
35 def __init__(self):
36 self.stats = {
37 'families': 0,
38 'classes': 0,
39 'bricks': 0,
40 'attribute_types': 0,
41 'attribute_values': 0
42 }
43
44 def on_segment_processed(self, segment_code, segment_desc, is_new):
45 """Only allow the Food/Beverage segment to be processed."""
46 return segment_code == FOOD_SEGMENT_CODE
47
48 def on_family_processed(self, family_code, family_desc, segment_code, is_new):
49 if segment_code == FOOD_SEGMENT_CODE:
50 self.stats['families'] += 1
51 logging.info(f"Processing Food Family: {family_desc} ({family_code})")
52
53 def on_class_processed(self, class_code, class_desc, family_code, is_new):
54 self.stats['classes'] += 1
55
56 def on_brick_processed(self, brick_code, brick_desc, class_code, is_new):
57 self.stats['bricks'] += 1
58
59 def on_attribute_type_processed(self, att_type_code, att_type_text, brick_code, is_new):
60 self.stats['attribute_types'] += 1
61
62 def on_attribute_value_processed(self, att_value_code, att_value_text, att_type_code, is_new):
63 self.stats['attribute_values'] += 1
64
65 def on_processing_complete(self, counters):
66 logging.info("=== Food Segment Import Statistics ===")
67 logging.info(f"Families: {self.stats['families']}")
68 logging.info(f"Classes: {self.stats['classes']}")
69 logging.info(f"Bricks: {self.stats['bricks']}")
70 logging.info(f"Attribute Types: {self.stats['attribute_types']}")
71 logging.info(f"Attribute Values: {self.stats['attribute_values']}")
72
73
74def filter_xml_for_food_segment(input_xml_path, output_xml_path):
75 """
76 Filter the XML file to include only the Food/Beverage segment.
77
78 Args:
79 input_xml_path: Path to the original XML file
80 output_xml_path: Path to save the filtered XML file
81
82 Returns:
83 bool: True if successful, False otherwise
84 """
85 try:
86 # Parse the XML file
87 tree = ET.parse(input_xml_path)
88 root = tree.getroot()
89
90 # Find all segments
91 segments = root.findall(".//segment")
92 if not segments:
93 logging.error("No segments found in the XML file")
94 return False
95
96 # Keep only the Food/Beverage segment
97 segments_to_remove = []
98 for segment in segments:
99 if segment.get("code") != FOOD_SEGMENT_CODE:
100 segments_to_remove.append(segment)
101
102 # Remove non-food segments
103 for segment in segments_to_remove:
104 root.remove(segment)
105
106 # Save the filtered XML
107 tree.write(output_xml_path)
108 logging.info(f"Filtered XML saved to {output_xml_path}")
109 return True
110
111 except Exception as e:
112 logging.error(f"Error filtering XML: {e}")
113 return False
114
115
116def main():
117 """Main function to demonstrate advanced import of Food/Beverage segment."""
118 # Create a downloader instance
119 downloader = GPCDownloader(download_dir=GPC_DOWNLOAD_DIR)
120
121 # Find or download the latest XML file
122 xml_file = downloader.find_latest_xml_file()
123 if not xml_file:
124 logging.info("No cached XML files found. Downloading latest...")
125 xml_file = downloader.download_latest_gpc_xml()
126 if not xml_file:
127 logging.error("Failed to download GPC data")
128 return
129
130 # Create a filtered XML file with only the Food/Beverage segment
131 filtered_xml_path = os.path.join(os.path.dirname(xml_file), "food_segment.xml")
132 if not filter_xml_for_food_segment(xml_file, filtered_xml_path):
133 logging.error("Failed to filter XML for Food/Beverage segment")
134 return
135
136 # Create database connection
137 db_connection = DatabaseConnection(DB_FILE)
138
139 # Setup database
140 if not setup_database(db_connection):
141 logging.error("Failed to setup database")
142 return
143
144 # Create callback filter
145 food_filter = FoodSegmentFilter()
146
147 # Create parser and process filtered XML file
148 parser = GPCParser(db_connection, callback=food_filter)
149 parser.process_xml(filtered_xml_path)
150
151 # Close database connection
152 db_connection.close()
153
154 logging.info("Food/Beverage segment import completed successfully")
155
156
157if __name__ == "__main__":
158 main()