Advanced Usage

PostgreSQL Support

The GS1 GPC tool supports both SQLite and PostgreSQL databases. To use PostgreSQL, you need to install the PostgreSQL extra:

pip install gs1_gpc[postgresql]

Then you can use the --db-type postgresql option with the gpc import-gpc command:

gpc import-gpc --db-type postgresql --db-file "postgresql://user:password@localhost/dbname"

The connection string format is:

postgresql://username:password@hostname:port/database

Custom XML Files

You can use your own XML files instead of downloading them from the GS1 API:

gpc import-gpc --xml-file ./my_custom_file.xml

The XML file must follow the GS1 GPC XML format with the following structure:

<schema>
  <segment code="10000000" text="Segment Description">
    <family code="10100000" text="Family Description">
      <class code="10100100" text="Class Description">
        <brick code="10100101" text="Brick Description">
          <attType code="20000001" text="Attribute Type Description">
            <attValue code="30000001" text="Attribute Value Description" />
            <attValue code="30000002" text="Attribute Value Description" />
          </attType>
        </brick>
      </class>
    </family>
  </segment>
</schema>

Logging

You can control the logging level with the --verbose and --quiet options:

# Enable detailed debug logging
gpc import-gpc --verbose

# Suppress all logging except errors
gpc import-gpc --quiet

Programmatic Usage

You can use the GS1 GPC tool as a Python library in your own code using the class-based API:

from gs1_gpc.db import DatabaseConnection, setup_database
from gs1_gpc.parser import GPCParser
from gs1_gpc.downloader import GPCDownloader
from gs1_gpc.exporter import GPCExporter

# Create a downloader instance
downloader = GPCDownloader(download_dir="/path/to/downloads", language_code="en")

# Download the latest GPC data
xml_file = downloader.download_latest_gpc_xml()

# Create database connection
db_connection = DatabaseConnection('my_database.sqlite3')

# Setup database
setup_database(db_connection)

# Create parser and process XML file
parser = GPCParser(db_connection)
parser.process_xml(xml_file)

# Close database connection
db_connection.close()

# Export database to SQL
exporter = GPCExporter(export_dir="/path/to/exports", language_code="en")
exporter.dump_database_to_sql('my_database.sqlite3')

Using Models and Callbacks

You can use the models and callbacks to process GPC data in a more structured way:

from gs1_gpc.db import DatabaseConnection, setup_database
from gs1_gpc.parser import GPCParser
from gs1_gpc.models import GPCModels
from gs1_gpc.callbacks import GPCProcessedCallback

# Custom callback implementation
class MyCallback(GPCProcessedCallback):
    def on_brick_processed(self, brick_code, brick_desc, class_code, is_new):
        print(f"Processed brick: {brick_code} - {brick_desc}")

    def on_processing_complete(self, counters):
        print(f"Processing complete. Processed {counters['bricks_processed']} bricks.")

# Create database connection
db_connection = DatabaseConnection('my_database.sqlite3')
setup_database(db_connection)

# Create parser with callback and process XML file
callback = MyCallback()
parser = GPCParser(db_connection, callback=callback)
parser.process_xml('gpc_data.xml')

Food Segment Example

The package includes an advanced example that demonstrates how to import only the Food/Beverage segment:

  1#!/usr/bin/env python3
  2"""
  3Advanced example script demonstrating how to import only the Food/Beverage segment (50000000)
  4from GS1 GPC data.
  5"""
  6
  7import os
  8import logging
  9import xml.etree.ElementTree as ET
 10from gs1_gpc.db import DatabaseConnection, setup_database
 11from gs1_gpc.parser import GPCParser
 12from gs1_gpc.downloader import GPCDownloader
 13from gs1_gpc.callbacks import GPCProcessedCallback
 14
 15# Configure logging
 16logging.basicConfig(
 17    level=logging.INFO,
 18    format='%(asctime)s - %(levelname)s - %(message)s'
 19)
 20
 21# Get script directory
 22SCRIPT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 23
 24# Define paths
 25GPC_DOWNLOAD_DIR = os.path.join(SCRIPT_DIR, 'data', 'imports')
 26DB_FILE = os.path.join(SCRIPT_DIR, 'data', 'instances', 'food_segment_import.sqlite3')
 27
 28# Define the Food/Beverage segment code
 29FOOD_SEGMENT_CODE = "50000000"
 30
 31
 32class FoodSegmentFilter(GPCProcessedCallback):
 33    """Callback to filter and process only the Food/Beverage segment."""
 34    
 35    def __init__(self):
 36        self.stats = {
 37            'families': 0,
 38            'classes': 0,
 39            'bricks': 0,
 40            'attribute_types': 0,
 41            'attribute_values': 0
 42        }
 43    
 44    def on_segment_processed(self, segment_code, segment_desc, is_new):
 45        """Only allow the Food/Beverage segment to be processed."""
 46        return segment_code == FOOD_SEGMENT_CODE
 47    
 48    def on_family_processed(self, family_code, family_desc, segment_code, is_new):
 49        if segment_code == FOOD_SEGMENT_CODE:
 50            self.stats['families'] += 1
 51            logging.info(f"Processing Food Family: {family_desc} ({family_code})")
 52    
 53    def on_class_processed(self, class_code, class_desc, family_code, is_new):
 54        self.stats['classes'] += 1
 55    
 56    def on_brick_processed(self, brick_code, brick_desc, class_code, is_new):
 57        self.stats['bricks'] += 1
 58    
 59    def on_attribute_type_processed(self, att_type_code, att_type_text, brick_code, is_new):
 60        self.stats['attribute_types'] += 1
 61    
 62    def on_attribute_value_processed(self, att_value_code, att_value_text, att_type_code, is_new):
 63        self.stats['attribute_values'] += 1
 64    
 65    def on_processing_complete(self, counters):
 66        logging.info("=== Food Segment Import Statistics ===")
 67        logging.info(f"Families: {self.stats['families']}")
 68        logging.info(f"Classes: {self.stats['classes']}")
 69        logging.info(f"Bricks: {self.stats['bricks']}")
 70        logging.info(f"Attribute Types: {self.stats['attribute_types']}")
 71        logging.info(f"Attribute Values: {self.stats['attribute_values']}")
 72
 73
 74def filter_xml_for_food_segment(input_xml_path, output_xml_path):
 75    """
 76    Filter the XML file to include only the Food/Beverage segment.
 77    
 78    Args:
 79        input_xml_path: Path to the original XML file
 80        output_xml_path: Path to save the filtered XML file
 81    
 82    Returns:
 83        bool: True if successful, False otherwise
 84    """
 85    try:
 86        # Parse the XML file
 87        tree = ET.parse(input_xml_path)
 88        root = tree.getroot()
 89        
 90        # Find all segments
 91        segments = root.findall(".//segment")
 92        if not segments:
 93            logging.error("No segments found in the XML file")
 94            return False
 95        
 96        # Keep only the Food/Beverage segment
 97        segments_to_remove = []
 98        for segment in segments:
 99            if segment.get("code") != FOOD_SEGMENT_CODE:
100                segments_to_remove.append(segment)
101        
102        # Remove non-food segments
103        for segment in segments_to_remove:
104            root.remove(segment)
105        
106        # Save the filtered XML
107        tree.write(output_xml_path)
108        logging.info(f"Filtered XML saved to {output_xml_path}")
109        return True
110    
111    except Exception as e:
112        logging.error(f"Error filtering XML: {e}")
113        return False
114
115
116def main():
117    """Main function to demonstrate advanced import of Food/Beverage segment."""
118    # Create a downloader instance
119    downloader = GPCDownloader(download_dir=GPC_DOWNLOAD_DIR)
120    
121    # Find or download the latest XML file
122    xml_file = downloader.find_latest_xml_file()
123    if not xml_file:
124        logging.info("No cached XML files found. Downloading latest...")
125        xml_file = downloader.download_latest_gpc_xml()
126        if not xml_file:
127            logging.error("Failed to download GPC data")
128            return
129    
130    # Create a filtered XML file with only the Food/Beverage segment
131    filtered_xml_path = os.path.join(os.path.dirname(xml_file), "food_segment.xml")
132    if not filter_xml_for_food_segment(xml_file, filtered_xml_path):
133        logging.error("Failed to filter XML for Food/Beverage segment")
134        return
135    
136    # Create database connection
137    db_connection = DatabaseConnection(DB_FILE)
138    
139    # Setup database
140    if not setup_database(db_connection):
141        logging.error("Failed to setup database")
142        return
143    
144    # Create callback filter
145    food_filter = FoodSegmentFilter()
146    
147    # Create parser and process filtered XML file
148    parser = GPCParser(db_connection, callback=food_filter)
149    parser.process_xml(filtered_xml_path)
150    
151    # Close database connection
152    db_connection.close()
153    
154    logging.info("Food/Beverage segment import completed successfully")
155
156
157if __name__ == "__main__":
158    main()