parses paypal soap logs
1#include <cstdio>
2#include <iostream>
3#include <fstream>
4#include <string>
5#include <vector>
6#include <map>
7#include <regex>
8#include <algorithm>
9#include <numeric>
10#include <iomanip>
11#include <getopt.h>
12#include <unordered_map>
13
14// Transaction data structure
15struct Transaction {
16 int transNum;
17 std::string amount;
18 std::string currency;
19 std::string firstName;
20 std::string lastName;
21 std::string street;
22 std::string city;
23 std::string state;
24 std::string zip;
25 std::string ccType;
26 std::string ccLast4;
27 std::string expMonth;
28 std::string expYear;
29 std::string cvv;
30 std::string transId;
31 std::string status;
32 std::string corrId;
33 std::string procAmount;
34};
35
36// Response data structure
37struct Response {
38 std::string transId;
39 std::string status;
40 std::string corrId;
41 std::string procAmount;
42};
43
44// Function prototypes
45void showHelp(const char* programName);
46void generateBashCompletion();
47void generateZshCompletion();
48void generateFishCompletion();
49void generateManPage();
50std::string extractXmlValue(const std::string& xml, const std::string& tag);
51std::string extractXmlAttribute(const std::string& xml, const std::string& attribute);
52std::vector<std::string> extractRequests(const std::string& logContent);
53std::vector<std::string> extractResponses(const std::string& logContent);
54std::vector<Response> parseResponses(const std::vector<std::string>& responseXmls);
55std::vector<Transaction> parseTransactions(const std::vector<std::string>& requestXmls, const std::vector<Response>& responses);
56void outputRawData(const std::vector<Transaction>& transactions);
57void outputSummary(const std::vector<Transaction>& transactions);
58
59int main(int argc, char* argv[]) {
60 // Default options
61 bool summaryOnly = false;
62 std::string logFile;
63
64 // Parse command line options
65 static struct option longOptions[] = {
66 {"help", no_argument, 0, 'h'},
67 {"summary", no_argument, 0, 's'},
68 {"raw", no_argument, 0, 'r'},
69 {"generate-bash-completion", no_argument, 0, 0},
70 {"generate-zsh-completion", no_argument, 0, 0},
71 {"generate-fish-completion", no_argument, 0, 0},
72 {"man", no_argument, 0, 0},
73 {0, 0, 0, 0}
74 };
75
76 int optionIndex = 0;
77 int opt;
78 while ((opt = getopt_long(argc, argv, "hsr", longOptions, &optionIndex)) != -1) {
79 switch (opt) {
80 case 0:
81 // Long options without short equivalents
82 if (strcmp(longOptions[optionIndex].name, "generate-bash-completion") == 0) {
83 generateBashCompletion();
84 return 0;
85 } else if (strcmp(longOptions[optionIndex].name, "generate-zsh-completion") == 0) {
86 generateZshCompletion();
87 return 0;
88 } else if (strcmp(longOptions[optionIndex].name, "generate-fish-completion") == 0) {
89 generateFishCompletion();
90 return 0;
91 } else if (strcmp(longOptions[optionIndex].name, "man") == 0) {
92 generateManPage();
93 return 0;
94 }
95 break;
96 case 'h':
97 showHelp(argv[0]);
98 return 0;
99 case 's':
100 summaryOnly = true;
101 break;
102 case 'r':
103 summaryOnly = false;
104 break;
105 case '?':
106 std::cerr << "Unknown option: " << static_cast<char>(optopt) << std::endl;
107 showHelp(argv[0]);
108 return 1;
109 default:
110 break;
111 }
112 }
113
114 // Get logfile name
115 if (optind < argc) {
116 logFile = argv[optind];
117 } else {
118 std::cerr << "Error: No logfile specified" << std::endl;
119 showHelp(argv[0]);
120 return 1;
121 }
122
123 // Check if file exists
124 std::ifstream file(logFile);
125 if (!file.is_open()) {
126 std::cerr << "Error: File '" << logFile << "' not found" << std::endl;
127 return 1;
128 }
129
130 // Read the entire file
131 std::string logContent((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
132 file.close();
133
134 // Extract requests and responses
135 std::vector<std::string> requestXmls = extractRequests(logContent);
136 std::vector<std::string> responseXmls = extractResponses(logContent);
137
138 // Parse responses
139 std::vector<Response> responses = parseResponses(responseXmls);
140
141 // Parse transactions
142 std::vector<Transaction> transactions = parseTransactions(requestXmls, responses);
143
144 // Output data
145 if (summaryOnly) {
146 outputSummary(transactions);
147 } else {
148 outputRawData(transactions);
149 }
150
151 return 0;
152}
153
154void showHelp(const char* programName) {
155 std::cout << "PayPal SOAP Log Parser\n\n";
156 std::cout << "USAGE:\n";
157 std::cout << " " << programName << " [OPTIONS] <logfile>\n\n";
158 std::cout << "OPTIONS:\n";
159 std::cout << " -h, --help Show this help message\n";
160 std::cout << " -s, --summary Show summary statistics only\n";
161 std::cout << " -r, --raw Output raw structured data (default)\n";
162 std::cout << " --generate-bash-completion Generate Bash completion script\n";
163 std::cout << " --generate-zsh-completion Generate Zsh completion script\n";
164 std::cout << " --generate-fish-completion Generate Fish completion script\n";
165 std::cout << " --man Generate man page\n\n";
166 std::cout << "OUTPUT FORMAT:\n";
167 std::cout << " TRANS_NUM|AMOUNT|CURRENCY|FIRSTNAME|LASTNAME|STREET|CITY|STATE|ZIP|CCTYPE|CCLAST4|EXPMONTH|EXPYEAR|CVV|TRANSID|STATUS|CORRID|PROC_AMOUNT\n\n";
168 std::cout << "FIELD DESCRIPTIONS:\n";
169 std::cout << " TRANS_NUM - Transaction sequence number\n";
170 std::cout << " AMOUNT - Order total amount\n";
171 std::cout << " CURRENCY - Currency code (USD, etc)\n";
172 std::cout << " FIRSTNAME - Customer first name\n";
173 std::cout << " LASTNAME - Customer last name\n";
174 std::cout << " STREET - Street address\n";
175 std::cout << " CITY - City name\n";
176 std::cout << " STATE - State/Province code\n";
177 std::cout << " ZIP - Postal code\n";
178 std::cout << " CCTYPE - Credit card type (Visa, MasterCard, etc)\n";
179 std::cout << " CCLAST4 - Last 4 digits of credit card\n";
180 std::cout << " EXPMONTH - Card expiration month\n";
181 std::cout << " EXPYEAR - Card expiration year\n";
182 std::cout << " CVV - CVV code\n";
183 std::cout << " TRANSID - PayPal transaction ID\n";
184 std::cout << " STATUS - Transaction status (Success/Failure)\n";
185 std::cout << " CORRID - Correlation ID\n";
186 std::cout << " PROC_AMOUNT - Actually processed amount\n\n";
187 std::cout << "EXAMPLES:\n";
188 std::cout << " # Get all transactions\n";
189 std::cout << " " << programName << " payments.log\n\n";
190 std::cout << " # Get only successful transactions\n";
191 std::cout << " " << programName << " payments.log | grep Success\n\n";
192 std::cout << " # Count transactions by state\n";
193 std::cout << " " << programName << " payments.log | cut -d'|' -f8 | sort | uniq -c | sort -nr\n\n";
194 std::cout << " # Find largest transaction\n";
195 std::cout << " " << programName << " payments.log | sort -t'|' -k2 -nr | head -1\n\n";
196 std::cout << " # Get transactions over $500\n";
197 std::cout << " " << programName << " payments.log | awk -F'|' '$2 > 500'\n\n";
198 std::cout << " # Summary stats\n";
199 std::cout << " " << programName << " -s payments.log\n";
200}
201
202void generateBashCompletion() {
203 std::cout << R"(
204_soapdump_completions()
205{
206 local cur prev opts
207 COMPREPLY=()
208 cur="${COMP_WORDS[COMP_CWORD]}"
209 prev="${COMP_WORDS[COMP_CWORD-1]}"
210 opts="--help --summary --raw --generate-bash-completion --generate-zsh-completion --generate-fish-completion --man"
211
212 if [[ ${cur} == -* ]] ; then
213 COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
214 return 0
215 fi
216
217 # Complete with log files if not an option
218 if [[ ${cur} != -* ]]; then
219 COMPREPLY=( $(compgen -f -X '!*.log' -- ${cur}) )
220 return 0
221 fi
222}
223
224complete -F _soapdump_completions soapdump
225)" << std::endl;
226}
227
228void generateZshCompletion() {
229 std::cout << R"(
230#compdef soapdump
231
232_arguments -s -S \
233 '(-h --help)'{-h,--help}'[Show help message]' \
234 '(-s --summary)'{-s,--summary}'[Show summary statistics only]' \
235 '(-r --raw)'{-r,--raw}'[Output raw structured data (default)]' \
236 '--generate-bash-completion[Generate Bash completion script]' \
237 '--generate-zsh-completion[Generate Zsh completion script]' \
238 '--generate-fish-completion[Generate Fish completion script]' \
239 '--man[Generate man page]' \
240 '*:log file:_files -g "*.log"'
241)" << std::endl;
242}
243
244void generateFishCompletion() {
245 std::cout << R"(
246function __fish_soapdump_no_subcommand
247 set cmd (commandline -opc)
248 if [ (count $cmd) -eq 1 ]
249 return 0
250 end
251 return 1
252end
253
254complete -c soapdump -s h -l help -d "Show help message"
255complete -c soapdump -s s -l summary -d "Show summary statistics only"
256complete -c soapdump -s r -l raw -d "Output raw structured data (default)"
257complete -c soapdump -l generate-bash-completion -d "Generate Bash completion script"
258complete -c soapdump -l generate-zsh-completion -d "Generate Zsh completion script"
259complete -c soapdump -l generate-fish-completion -d "Generate Fish completion script"
260complete -c soapdump -l man -d "Generate man page"
261complete -c soapdump -n "__fish_soapdump_no_subcommand" -a "*.log" -d "Log file"
262)" << std::endl;
263}
264
265void generateManPage() {
266 std::cout << R"(.TH SOAPDUMP 1 "September 2025" "soapdump 0.1.0" "User Commands"
267.SH NAME
268soapdump \- PayPal SOAP log parser
269.SH SYNOPSIS
270.B soapdump
271[\fIOPTIONS\fR] \fILOGFILE\fR
272.SH DESCRIPTION
273.B soapdump
274is a high-performance PayPal SOAP log parser that extracts transaction data from log files and outputs it in a structured format.
275.SH OPTIONS
276.TP
277.BR \-h ", " \-\-help
278Show help message and exit.
279.TP
280.BR \-s ", " \-\-summary
281Show summary statistics only.
282.TP
283.BR \-r ", " \-\-raw
284Output raw structured data (default).
285.TP
286.BR \-\-generate-bash-completion
287Generate Bash completion script.
288.TP
289.BR \-\-generate-zsh-completion
290Generate Zsh completion script.
291.TP
292.BR \-\-generate-fish-completion
293Generate Fish completion script.
294.TP
295.BR \-\-man
296Generate this man page.
297.SH OUTPUT FORMAT
298The output is pipe-separated with the following fields:
299.PP
300TRANS_NUM|AMOUNT|CURRENCY|FIRSTNAME|LASTNAME|STREET|CITY|STATE|ZIP|CCTYPE|CCLAST4|EXPMONTH|EXPYEAR|CVV|TRANSID|STATUS|CORRID|PROC_AMOUNT
301.SH EXAMPLES
302.TP
303Get all transactions:
304.B soapdump payments.log
305.TP
306Get only successful transactions:
307.B soapdump payments.log | grep Success
308.TP
309Count transactions by state:
310.B soapdump payments.log | cut -d'|' -f8 | sort | uniq -c | sort -nr
311.TP
312Find largest transaction:
313.B soapdump payments.log | sort -t'|' -k2 -nr | head -1
314.TP
315Get transactions over $500:
316.B soapdump payments.log | awk -F'|' '$2 > 500'
317.TP
318Summary stats:
319.B soapdump -s payments.log
320.SH AUTHOR
321Kieran Klukas <me@dunkirk.sh>
322.SH COPYRIGHT
323Copyright \(co 2025 Kieran Klukas. License: MIT.
324)" << std::endl;
325}
326
327std::string extractXmlValue(const std::string& xml, const std::string& tag) {
328 std::regex pattern("<" + tag + "(?:[^>]*)>([^<]*)</" + tag + ">");
329 std::smatch match;
330 if (std::regex_search(xml, match, pattern) && match.size() > 1) {
331 return match[1].str();
332 }
333 return "";
334}
335
336std::string extractXmlAttribute(const std::string& xml, const std::string& attribute) {
337 std::regex pattern(attribute + "=\"([^\"]*)\"");
338 std::smatch match;
339 if (std::regex_search(xml, match, pattern) && match.size() > 1) {
340 return match[1].str();
341 }
342 return "";
343}
344
345std::vector<std::string> extractRequests(const std::string& logContent) {
346 std::vector<std::string> requests;
347 std::regex pattern("PPAPIService: Request: (.*)");
348
349 std::string::const_iterator searchStart(logContent.cbegin());
350 std::smatch match;
351 while (std::regex_search(searchStart, logContent.cend(), match, pattern)) {
352 if (match.size() > 1) {
353 requests.push_back(match[1].str());
354 }
355 searchStart = match.suffix().first;
356 }
357
358 return requests;
359}
360
361std::vector<std::string> extractResponses(const std::string& logContent) {
362 std::vector<std::string> responses;
363 std::regex pattern("PPAPIService: Response: <\\?.*\\?>(.*)");
364
365 std::string::const_iterator searchStart(logContent.cbegin());
366 std::smatch match;
367 while (std::regex_search(searchStart, logContent.cend(), match, pattern)) {
368 if (match.size() > 1) {
369 responses.push_back(match[1].str());
370 }
371 searchStart = match.suffix().first;
372 }
373
374 return responses;
375}
376
377std::vector<Response> parseResponses(const std::vector<std::string>& responseXmls) {
378 std::vector<Response> responses;
379
380 for (const auto& xml : responseXmls) {
381 Response response;
382 response.transId = extractXmlValue(xml, "TransactionID");
383 response.status = extractXmlValue(xml, "Ack");
384 response.corrId = extractXmlValue(xml, "CorrelationID");
385 response.procAmount = extractXmlValue(xml, "Amount");
386
387 responses.push_back(response);
388 }
389
390 return responses;
391}
392
393std::vector<Transaction> parseTransactions(const std::vector<std::string>& requestXmls, const std::vector<Response>& responses) {
394 std::vector<Transaction> transactions;
395 int transNum = 1;
396
397 for (size_t i = 0; i < requestXmls.size(); ++i) {
398 const auto& xml = requestXmls[i];
399
400 Transaction transaction;
401 transaction.transNum = transNum++;
402
403 // Extract request fields
404 transaction.amount = extractXmlValue(xml, "ebl:OrderTotal");
405 transaction.currency = extractXmlAttribute(xml, "currencyID");
406 transaction.firstName = extractXmlValue(xml, "ebl:FirstName");
407 transaction.lastName = extractXmlValue(xml, "ebl:LastName");
408 transaction.street = extractXmlValue(xml, "ebl:Street1");
409 transaction.city = extractXmlValue(xml, "ebl:CityName");
410 transaction.state = extractXmlValue(xml, "ebl:StateOrProvince");
411 transaction.zip = extractXmlValue(xml, "ebl:PostalCode");
412 transaction.ccType = extractXmlValue(xml, "ebl:CreditCardType");
413 transaction.ccLast4 = extractXmlValue(xml, "ebl:CreditCardLastFourDigits");
414 transaction.expMonth = extractXmlValue(xml, "ebl:ExpMonth");
415 transaction.expYear = extractXmlValue(xml, "ebl:ExpYear");
416 transaction.cvv = extractXmlValue(xml, "ebl:CVV2");
417
418 // Get corresponding response data
419 if (i < responses.size()) {
420 transaction.transId = responses[i].transId;
421 transaction.status = responses[i].status;
422 transaction.corrId = responses[i].corrId;
423 transaction.procAmount = responses[i].procAmount;
424 }
425
426 transactions.push_back(transaction);
427 }
428
429 return transactions;
430}
431
432void outputRawData(const std::vector<Transaction>& transactions) {
433 for (const auto& t : transactions) {
434 std::cout << t.transNum << "|"
435 << t.amount << "|"
436 << t.currency << "|"
437 << t.firstName << "|"
438 << t.lastName << "|"
439 << t.street << "|"
440 << t.city << "|"
441 << t.state << "|"
442 << t.zip << "|"
443 << t.ccType << "|"
444 << t.ccLast4 << "|"
445 << t.expMonth << "|"
446 << t.expYear << "|"
447 << t.cvv << "|"
448 << t.transId << "|"
449 << t.status << "|"
450 << t.corrId << "|"
451 << t.procAmount << std::endl;
452 }
453}
454
455void outputSummary(const std::vector<Transaction>& transactions) {
456 std::cout << "=== SUMMARY ===" << std::endl;
457
458 // Count transactions
459 int total = transactions.size();
460 int successful = std::count_if(transactions.begin(), transactions.end(),
461 [](const Transaction& t) { return t.status == "Success"; });
462
463 std::cout << "Total Transactions: " << total << std::endl;
464 std::cout << "Successful: " << successful << std::endl;
465 std::cout << "Failed: " << (total - successful) << std::endl;
466 std::cout << std::endl;
467
468 // Top 5 states
469 std::map<std::string, int> stateCounts;
470 for (const auto& t : transactions) {
471 stateCounts[t.state]++;
472 }
473
474 std::cout << "Top 5 States by Transaction Count:" << std::endl;
475 std::vector<std::pair<std::string, int>> stateCountVec(stateCounts.begin(), stateCounts.end());
476 std::sort(stateCountVec.begin(), stateCountVec.end(),
477 [](const auto& a, const auto& b) { return a.second > b.second; });
478
479 int count = 0;
480 for (const auto& sc : stateCountVec) {
481 if (count++ >= 5) break;
482 std::cout << " " << sc.first << ": " << sc.second << std::endl;
483 }
484 std::cout << std::endl;
485
486 // Transaction amount stats
487 std::vector<double> amounts;
488 for (const auto& t : transactions) {
489 try {
490 amounts.push_back(std::stod(t.amount));
491 } catch (...) {
492 // Skip invalid amounts
493 }
494 }
495
496 if (!amounts.empty()) {
497 double totalAmount = std::accumulate(amounts.begin(), amounts.end(), 0.0);
498 double largest = *std::max_element(amounts.begin(), amounts.end());
499 double smallest = *std::min_element(amounts.begin(), amounts.end());
500
501 std::cout << "Transaction Amount Stats:" << std::endl;
502 std::cout << " Total: $" << std::fixed << std::setprecision(2) << totalAmount << std::endl;
503 std::cout << " Largest: $" << std::fixed << std::setprecision(2) << largest << std::endl;
504 std::cout << " Smallest: $" << std::fixed << std::setprecision(2) << smallest << std::endl;
505 }
506}