# ******************************************************************************
# FILENAME      sgwire_isringhausen.py
#
# Version       7.0
#
# Encoding		UTF-8
#
# DESCRIPTION	Extract scheduled data from pdf:s to one calc (xlsx format)
#
# NOTES         Menu language - Swedish
#
# Compiler		python 3.10.12 running @ Ubuntu 22.04 LTS
#
# Copyright © 2023 All rights reserved Leif Krüger
#
#-----------------------------------------------------------------------------
# CHANGES
#-----------------------------------------------------------------------------
#   REF NO  VERSION	DATE        WHO DETAIL
# -----------------------------------------------------------------------------
# 	1	    1       2023-04-22	LK  Start date
# 	2	    2       2023-04-23	LK  Extract data
#   3       3       2023-04-23  LK  Save to xlsx
#   4       4       2023-04-23  LK  Read all pdf:s in a subfolder "pdf"
#   5       5       2023-04-25  LK  Include T-items (Tage) as well
#   6       6       2023-04-27  LK  Add alternating row color in xlsx
#   7       6.1     2023-04-29  LK  A script for both linux/windows
#   8       6.2     2023-04-30  LK  The same order has the same bg-color
#   9       6.3     2023-05-01  LK  Weekorder rows more compact in code
#  10       6.4     2023-05-01  LK  Handle T and W in same code
#  11       6.5     2023-05-07  LK  Move settings to sgwire_config.cfg
#  12       6.6     2023-06-25  LK  Make more clean code + result info
#  13       6.7     2023-08-01  LK  Bug fix: Add missing "Tage" starts with T3x
#  14       6.7     2023-08-01  LK  Remove obsolete text see "material_text21"
#  15       7.0     2023-08-02  LK  Autofit the worksheet (column width)
# ******************************************************************************

import PyPDF2
import xlsxwriter
import os
import glob
import configparser

################################################################################
# Read config from file sgwire_config.cfg
sgwire_config = configparser.RawConfigParser()
sgwire_config.read("./sgwire_config.cfg")

# Parameters
workbook_name = sgwire_config.get("sgwire-config", "workbook_name")
worksheet_name = sgwire_config.get("sgwire-config", "worksheet_name")
output_filename = workbook_name + ".xlsx"
output_file_path_windows = sgwire_config.get("sgwire-config", "output_file_path_windows")
output_file_path_linux = sgwire_config.get("sgwire-config", "output_file_path_linux")
path_pdf_windows = sgwire_config.get("sgwire-config", "path_pdf_windows")
path_pdf_linux = sgwire_config.get("sgwire-config", "path_pdf_linux")
output_striped = sgwire_config.get("sgwire-config", "output_striped")
striped_by = sgwire_config.get("sgwire-config", "striped_by")
cell_bg_header = sgwire_config.get("sgwire-config", "cell_bg_header")
cell_bg_row_even = sgwire_config.get("sgwire-config", "cell_bg_row_even")
cell_bg_row_odd = sgwire_config.get("sgwire-config", "cell_bg_row_odd")
info_text_started = sgwire_config.get("sgwire-config", "info_text_started")
info_text_finished = sgwire_config.get("sgwire-config", "info_text_finished")

# Script data
script_name = "sgwire_isringhausen.py"
copyright_year = "2023"
creator = "Leif Krüger"
creator_email = "leif@leifkruger.se"
script_version = "7.0"
last_updated = "2023-08-02"

# Clear screen 
def clearConsole():
    command = "clear"
    if os.name in ("nt", "dos"): # If Machine is running on Windows, use cls
        command = "cls"
    os.system(command)

clearConsole()

# Take care of path depending on os, Linux or Windows
if os.name in ("nt", "dos"):
    path_pdf = path_pdf_windows
    path_output = output_file_path_windows + output_filename
else:
    path_pdf = path_pdf_linux
    path_output = output_file_path_linux + output_filename

# Show start message
print(f"\n\n\n\n\t\tScriptnamn: {script_name}")
print(f"\n\n\n\n\t\tStatus: {info_text_started}")

# Create a workbook and add a worksheet
workbook = xlsxwriter.Workbook(path_output)
worksheet = workbook.add_worksheet(worksheet_name)

# Start from the first cell. Rows and columns are zero indexed
row = 0
col = 0

# If output striped: Cell background color
if output_striped == "yes":
    bg_format_header = workbook.add_format({'bg_color': cell_bg_header})
    bg_format1 = workbook.add_format({'bg_color': cell_bg_row_odd}) 
    bg_format2 = workbook.add_format({'bg_color': cell_bg_row_even}) 
else:
    # bg_format_header = workbook.add_format({'bg_color': cell_bg_header})
    bg_format1 = workbook.add_format({'bg_color': '#FFFFFA'})
    bg_format2 = workbook.add_format({'bg_color': '#FFFFFF'})

# Headers: attribut and backgroundcolor
worksheet.write(row, col, "Purchase Order No", bg_format_header)
worksheet.write(row, col + 1, "Customer Part No", bg_format_header)
worksheet.write(row, col + 2, "Customer Part Description", bg_format_header)
worksheet.write(row, col + 3, "Part Description En", bg_format_header)
worksheet.write(row, col + 4, "Part Description En2", bg_format_header)
worksheet.write(row, col + 5, "Day / Week", bg_format_header)
worksheet.write(row, col + 6, "Open QTY", bg_format_header)
worksheet.write(row, col + 7, "Change", bg_format_header)

# New row after headers in excel file
row += 1

# A list of all pdf files
pdf_files = glob.glob(path_pdf)

# Number of pdf files
numberof_pdf_files = len(pdf_files)

# Read all pdf files
for x_pdf in range(numberof_pdf_files):

    # Open PDF file in read mode
    pdf_file = open(pdf_files[x_pdf], 'rb')

    # A object of PyPDF2 for the PDF file
    pdf_reader = PyPDF2.PdfReader(pdf_file)

    # Variable for extracted text from pdf
    pdf_text = ""

    # Loop through every pdf page and extract text
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]
        pdf_text += page.extract_text()

    # Close pdf file
    pdf_file.close()

    # Split a string into a list where each row is a list item
    text = pdf_text.split('\n')

    # Number of rows
    numberOfRows = (len(text))

    # Use a flag to avoid dublicates
    duplicate_flag = 0
    duplicate_flag2 = 0
    duplicate_flag3 = 0

    for x in range(numberOfRows):
        textrow = text[x]

        # Cell background color for each order/row by pdf
        if striped_by == "order":
            if x_pdf % 2 == 0:
                bgcell = bg_format1 # Odd
            else:
                bgcell = bg_format2 # Even
        elif striped_by == "row":
            if row % 2 == 0:
                bgcell = bg_format1 # Odd
            else:
                bgcell = bg_format2 # Even
        else:
            bgcell = bg_format1        
        
        if "Lieferplannummer/Datum" in textrow:
            if duplicate_flag == 0:
                y = x + 1
                lieferplannummer_date = text[y]
                lieferplannummer = lieferplannummer_date[0:11]
                duplicate_flag = 1        
        
        if "Material " in textrow:
            material = text[x].replace("Material ", "")
            meterial_no = material[0:14]
            meterial_description = material[14:64]
            material_text20 = text[20]
            material_text21 = text[21].replace("______________________", "")
            material_text21 = material_text21.replace("ISRINGHAUSEN GmbH & Co. KG , 32655 Lemgo", "")

        # Take care of "T DD.MM.YY" after text "Liefertermin"
        if "Liefertermin" in textrow:
            duplicate_flag2 = 1

        # Take care of "T DD.MM.YY" before text "Die bestellen Artikel"
        if "Die bestellen Artikel" in textrow:
            duplicate_flag3 = 1
            
        # Take care of orderrow items "T" (Tage) and items "W" (Woche)
        if ((("T0" in textrow or "T1" in textrow or "T2" in textrow or "T3" in textrow) and (duplicate_flag2 == 1 and duplicate_flag3 == 0)) or ("W0" in textrow or "W1" in textrow or "W2" in textrow or "W3" in textrow or "W4" in textrow or "W5" in textrow)):
            tw_textrow = text[x]
            worksheet.write(row, col, lieferplannummer, bgcell)
            worksheet.write(row, col + 1, meterial_no, bgcell)
            worksheet.write(row, col + 2, meterial_description, bgcell)
            worksheet.write(row, col + 3, material_text20, bgcell)
            worksheet.write(row, col + 4, material_text21, bgcell)
            worksheet.write(row, col + 5, tw_textrow[0:12], bgcell)
            worksheet.write(row, col + 6, tw_textrow[23:35], bgcell)
            worksheet.write(row, col + 7, tw_textrow[41:52], bgcell)
            row += 1

# Autofit the worksheet
worksheet.autofit()

workbook.close()

clearConsole()

print(f"\n\n\n\n\t\tStatus: {info_text_finished}")
print(f"\n\t\tStatistik: {numberof_pdf_files} st pdf:er genomsökta") 
print(f"\n\n\n\t\tSe resultat i fil: \n\n\t\t{path_output}")
print("\n\n\n\n\n\t\t--- About the script -------------------------------------")
print(f"\n\t\tScriptname: {script_name} \n\n\t\tScriptversion: {script_version}")
print(f"\n\t\tLast updated: {last_updated}")
print(f"\n\t\tCreator: {creator}")
print(f"\n\t\tContact email: {creator_email}")
print(f"\n\n\t\tCopyright © {copyright_year} All Rights Reserved {creator}\n\n\n")