parses paypal soap logs
1#include <cstdio>
2#include <iostream>
3#include <fstream>
4#include <string>
5#include <vector>
6#include <map>
7#include <regex>
8#include <algorithm>
9#include <numeric>
10#include <iomanip>
11#include <getopt.h>
12#include <cstring>
13
14// Transaction data structure
15struct Transaction {
16 int transNum;
17 std::string amount;
18 std::string currency;
19 std::string firstName;
20 std::string lastName;
21 std::string street;
22 std::string city;
23 std::string state;
24 std::string zip;
25 std::string ccType;
26 std::string ccLast4;
27 std::string expMonth;
28 std::string expYear;
29 std::string cvv;
30 std::string transId;
31 std::string status;
32 std::string corrId;
33 std::string procAmount;
34};
35
36// Response data structure
37struct Response {
38 std::string transId;
39 std::string status;
40 std::string corrId;
41 std::string procAmount;
42};
43
44// Function prototypes
45void showHelp(const char* programName);
46void generateBashCompletion();
47void generateZshCompletion();
48void generateFishCompletion();
49void generateManPage();
50std::string extractXmlValue(const std::string& xml, const std::string& tag);
51std::string extractXmlAttribute(const std::string& xml, const std::string& attribute);
52std::vector<std::string> extractRequests(const std::string& logContent);
53std::vector<std::string> extractResponses(const std::string& logContent);
54std::vector<Response> parseResponses(const std::vector<std::string>& responseXmls);
55std::vector<Transaction> parseTransactions(const std::vector<std::string>& requestXmls, const std::vector<Response>& responses);
56void outputRawData(const std::vector<Transaction>& transactions);
57void outputSummary(const std::vector<Transaction>& transactions);
58
59int main(int argc, char* argv[]) {
60 // Default options
61 bool summaryOnly = false;
62 std::string logFile;
63
64 // Parse command line options
65 static struct option longOptions[] = {
66 {"help", no_argument, 0, 'h'},
67 {"summary", no_argument, 0, 's'},
68 {"raw", no_argument, 0, 'r'},
69 {"generate-bash-completion", no_argument, 0, 0},
70 {"generate-zsh-completion", no_argument, 0, 0},
71 {"generate-fish-completion", no_argument, 0, 0},
72 {"man", no_argument, 0, 0},
73 {0, 0, 0, 0}
74 };
75
76 int optionIndex = 0;
77 int opt;
78 while ((opt = getopt_long(argc, argv, "hsr", longOptions, &optionIndex)) != -1) {
79 switch (opt) {
80 case 0:
81 // Long options without short equivalents
82 if (strcmp(longOptions[optionIndex].name, "generate-bash-completion") == 0) {
83 generateBashCompletion();
84 return 0;
85 } else if (strcmp(longOptions[optionIndex].name, "generate-zsh-completion") == 0) {
86 generateZshCompletion();
87 return 0;
88 } else if (strcmp(longOptions[optionIndex].name, "generate-fish-completion") == 0) {
89 generateFishCompletion();
90 return 0;
91 } else if (strcmp(longOptions[optionIndex].name, "man") == 0) {
92 generateManPage();
93 return 0;
94 }
95 break;
96 case 'h':
97 showHelp(argv[0]);
98 return 0;
99 case 's':
100 summaryOnly = true;
101 break;
102 case 'r':
103 summaryOnly = false;
104 break;
105 case '?':
106 std::cerr << "Unknown option: " << static_cast<char>(optopt) << std::endl;
107 showHelp(argv[0]);
108 return 1;
109 default:
110 break;
111 }
112 }
113
114 // Get logfile name
115 if (optind < argc) {
116 logFile = argv[optind];
117 } else {
118 std::cerr << "Error: No logfile specified" << std::endl;
119 showHelp(argv[0]);
120 return 1;
121 }
122
123 // Check if file exists
124 std::ifstream file(logFile);
125 if (!file.is_open()) {
126 std::cerr << "Error: File '" << logFile << "' not found" << std::endl;
127 return 1;
128 }
129
130 // Read the entire file
131 std::string logContent((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
132 file.close();
133
134 // Extract requests and responses
135 std::vector<std::string> requestXmls = extractRequests(logContent);
136 std::vector<std::string> responseXmls = extractResponses(logContent);
137
138 // Parse responses
139 std::vector<Response> responses = parseResponses(responseXmls);
140
141 // Parse transactions
142 std::vector<Transaction> transactions = parseTransactions(requestXmls, responses);
143
144 // Output data
145 if (summaryOnly) {
146 outputSummary(transactions);
147 } else {
148 outputRawData(transactions);
149 }
150
151 return 0;
152}
153
154void showHelp(const char* programName) {
155 std::cout << "PayPal SOAP Log Parser\n\n";
156 std::cout << "USAGE:\n";
157 std::cout << " " << programName << " [OPTIONS] <logfile>\n\n";
158 std::cout << "OPTIONS:\n";
159 std::cout << " -h, --help Show this help message\n";
160 std::cout << " -s, --summary Show summary statistics only\n";
161 std::cout << " -r, --raw Output raw structured data (default)\n";
162 std::cout << " --generate-bash-completion Generate Bash completion script\n";
163 std::cout << " --generate-zsh-completion Generate Zsh completion script\n";
164 std::cout << " --generate-fish-completion Generate Fish completion script\n";
165 std::cout << " --man Generate man page\n\n";
166 std::cout << "For detailed information, field descriptions, and examples, run:\n";
167 std::cout << " man " << programName << " \n";
168}
169
170void generateBashCompletion() {
171 std::cout << R"(
172_soapdump_completions()
173{
174 local cur prev opts
175 COMPREPLY=()
176 cur="${COMP_WORDS[COMP_CWORD]}"
177 prev="${COMP_WORDS[COMP_CWORD-1]}"
178 opts="--help --summary --raw --generate-bash-completion --generate-zsh-completion --generate-fish-completion --man"
179
180 if [[ ${cur} == -* ]] ; then
181 COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
182 return 0
183 fi
184
185 # Complete with log files if not an option
186 if [[ ${cur} != -* ]]; then
187 COMPREPLY=( $(compgen -f -X '!*.log' -- ${cur}) )
188 return 0
189 fi
190}
191
192complete -F _soapdump_completions soapdump
193)" << std::endl;
194}
195
196void generateZshCompletion() {
197 std::cout << R"(
198#compdef soapdump
199
200_arguments -s -S \
201 '(-h --help)'{-h,--help}'[Show help message]' \
202 '(-s --summary)'{-s,--summary}'[Show summary statistics only]' \
203 '(-r --raw)'{-r,--raw}'[Output raw structured data (default)]' \
204 '--generate-bash-completion[Generate Bash completion script]' \
205 '--generate-zsh-completion[Generate Zsh completion script]' \
206 '--generate-fish-completion[Generate Fish completion script]' \
207 '--man[Generate man page]' \
208 '*:log file:_files -g "*.log"'
209)" << std::endl;
210}
211
212void generateFishCompletion() {
213 std::cout << R"(
214function __fish_soapdump_no_subcommand
215 set cmd (commandline -opc)
216 if [ (count $cmd) -eq 1 ]
217 return 0
218 end
219 return 1
220end
221
222complete -c soapdump -s h -l help -d "Show help message"
223complete -c soapdump -s s -l summary -d "Show summary statistics only"
224complete -c soapdump -s r -l raw -d "Output raw structured data"
225complete -c soapdump -l generate-bash-completion -d "Generate Bash completion script"
226complete -c soapdump -l generate-zsh-completion -d "Generate Zsh completion script"
227complete -c soapdump -l generate-fish-completion -d "Generate Fish completion script"
228complete -c soapdump -l man -d "Generate man page"
229complete -c soapdump -n "__fish_soapdump_no_subcommand" -a "*.log" -d "Log file"
230)" << std::endl;
231}
232
233void generateManPage() {
234 std::cout << R"(.TH SOAPDUMP 1 "December 2024" "soapdump 1.0" "User Commands"
235.SH NAME
236soapdump \- parse PayPal SOAP transaction logs
237.SH SYNOPSIS
238.B soapdump
239[\fIOPTIONS\fR] \fIlogfile\fR
240.SH DESCRIPTION
241.B soapdump
242parses PayPal SOAP log files to extract transaction data. It reads log entries containing XML request/response pairs and outputs structured transaction information in pipe-delimited format suitable for further processing with standard Unix tools.
243
244The program matches SOAP requests with their corresponding responses to provide complete transaction records including customer information, payment details, and processing results.
245.SH OPTIONS
246.TP
247.BR \-h ", " \-\-help
248Display help information and exit
249.TP
250.BR \-s ", " \-\-summary
251Display summary statistics instead of raw transaction data
252.TP
253.BR \-r ", " \-\-raw
254Output raw transaction data in pipe-delimited format (default behavior)
255.TP
256.BR \-\-generate-bash-completion
257Output bash shell completion script
258.TP
259.BR \-\-generate-zsh-completion
260Output zsh shell completion script
261.TP
262.BR \-\-generate-fish-completion
263Output fish shell completion script
264.TP
265.BR \-\-man
266Output this manual page in troff format
267.SH OUTPUT FORMAT
268By default, transactions are output one per line with pipe-separated fields:
269
270TRANS_NUM|AMOUNT|CURRENCY|FIRSTNAME|LASTNAME|STREET|CITY|STATE|ZIP|CCTYPE|CCLAST4|EXPMONTH|EXPYEAR|CVV|TRANSID|STATUS|CORRID|PROC_AMOUNT
271
272Fields may be empty if not present in the source data.
273.SH EXAMPLES
274Parse a log file and display all transactions:
275.RS
276.B soapdump paypal.log
277.RE
278
279Show only successful transactions:
280.RS
281.B soapdump paypal.log | grep '|Success|'
282.RE
283
284Count transactions by state:
285.RS
286.B soapdump paypal.log | cut -d'|' -f8 | sort | uniq -c | sort -rn
287.RE
288
289Find the largest transaction amount:
290.RS
291.B soapdump paypal.log | sort -t'|' -k2 -rn | head -1
292.RE
293
294Show transactions over $500:
295.RS
296.B soapdump paypal.log | awk -F'|' '$2 > 500'
297.RE
298
299Display summary statistics:
300.RS
301.B soapdump --summary paypal.log
302.RE
303.SH FILES
304The input file should contain PayPal SOAP API log entries with request and response XML data.
305.SH AUTHOR
306Written by Kieran Klukas.
307.SH REPORTING BUGS
308Report bugs to <me@dunkirk.sh>
309.SH COPYRIGHT
310Copyright \(co 2024 Kieran Klukas.
311License MIT: <https://opensource.org/licenses/MIT>
312.br
313This is free software: you are free to change and redistribute it.
314There is NO WARRANTY, to the extent permitted by law.
315)" << std::endl;
316}
317
318std::string extractXmlValue(const std::string& xml, const std::string& tag) {
319 std::regex pattern("<" + tag + "(?:[^>]*)>([^<]*)</" + tag + ">");
320 std::smatch match;
321 if (std::regex_search(xml, match, pattern) && match.size() > 1) {
322 return match[1].str();
323 }
324 return "";
325}
326
327std::string extractXmlAttribute(const std::string& xml, const std::string& attribute) {
328 std::regex pattern(attribute + "=\"([^\"]*)\"");
329 std::smatch match;
330 if (std::regex_search(xml, match, pattern) && match.size() > 1) {
331 return match[1].str();
332 }
333 return "";
334}
335
336std::vector<std::string> extractRequests(const std::string& logContent) {
337 std::vector<std::string> requests;
338 std::regex pattern("PPAPIService: Request: (.*)");
339
340 std::string::const_iterator searchStart(logContent.cbegin());
341 std::smatch match;
342 while (std::regex_search(searchStart, logContent.cend(), match, pattern)) {
343 if (match.size() > 1) {
344 requests.push_back(match[1].str());
345 }
346 searchStart = match.suffix().first;
347 }
348
349 return requests;
350}
351
352std::vector<std::string> extractResponses(const std::string& logContent) {
353 std::vector<std::string> responses;
354 std::regex pattern("PPAPIService: Response: <\\?.*\\?>(.*)");
355
356 std::string::const_iterator searchStart(logContent.cbegin());
357 std::smatch match;
358 while (std::regex_search(searchStart, logContent.cend(), match, pattern)) {
359 if (match.size() > 1) {
360 responses.push_back(match[1].str());
361 }
362 searchStart = match.suffix().first;
363 }
364
365 return responses;
366}
367
368std::vector<Response> parseResponses(const std::vector<std::string>& responseXmls) {
369 std::vector<Response> responses;
370
371 for (const auto& xml : responseXmls) {
372 Response response;
373 response.transId = extractXmlValue(xml, "TransactionID");
374 response.status = extractXmlValue(xml, "Ack");
375 response.corrId = extractXmlValue(xml, "CorrelationID");
376 response.procAmount = extractXmlValue(xml, "Amount");
377
378 responses.push_back(response);
379 }
380
381 return responses;
382}
383
384std::vector<Transaction> parseTransactions(const std::vector<std::string>& requestXmls, const std::vector<Response>& responses) {
385 std::vector<Transaction> transactions;
386 int transNum = 1;
387
388 for (size_t i = 0; i < requestXmls.size(); ++i) {
389 const auto& xml = requestXmls[i];
390
391 Transaction transaction;
392 transaction.transNum = transNum++;
393
394 // Extract request fields
395 transaction.amount = extractXmlValue(xml, "ebl:OrderTotal");
396 transaction.currency = extractXmlAttribute(xml, "currencyID");
397 transaction.firstName = extractXmlValue(xml, "ebl:FirstName");
398 transaction.lastName = extractXmlValue(xml, "ebl:LastName");
399 transaction.street = extractXmlValue(xml, "ebl:Street1");
400 transaction.city = extractXmlValue(xml, "ebl:CityName");
401 transaction.state = extractXmlValue(xml, "ebl:StateOrProvince");
402 transaction.zip = extractXmlValue(xml, "ebl:PostalCode");
403 transaction.ccType = extractXmlValue(xml, "ebl:CreditCardType");
404 transaction.ccLast4 = extractXmlValue(xml, "ebl:CreditCardLastFourDigits");
405 transaction.expMonth = extractXmlValue(xml, "ebl:ExpMonth");
406 transaction.expYear = extractXmlValue(xml, "ebl:ExpYear");
407 transaction.cvv = extractXmlValue(xml, "ebl:CVV2");
408
409 // Get corresponding response data
410 if (i < responses.size()) {
411 transaction.transId = responses[i].transId;
412 transaction.status = responses[i].status;
413 transaction.corrId = responses[i].corrId;
414 transaction.procAmount = responses[i].procAmount;
415 }
416
417 transactions.push_back(transaction);
418 }
419
420 return transactions;
421}
422
423void outputRawData(const std::vector<Transaction>& transactions) {
424 for (const auto& t : transactions) {
425 std::cout << t.transNum << "|"
426 << t.amount << "|"
427 << t.currency << "|"
428 << t.firstName << "|"
429 << t.lastName << "|"
430 << t.street << "|"
431 << t.city << "|"
432 << t.state << "|"
433 << t.zip << "|"
434 << t.ccType << "|"
435 << t.ccLast4 << "|"
436 << t.expMonth << "|"
437 << t.expYear << "|"
438 << t.cvv << "|"
439 << t.transId << "|"
440 << t.status << "|"
441 << t.corrId << "|"
442 << t.procAmount << std::endl;
443 }
444}
445
446void outputSummary(const std::vector<Transaction>& transactions) {
447 std::cout << "=== SUMMARY ===" << std::endl;
448
449 // Count transactions
450 int total = transactions.size();
451 int successful = std::count_if(transactions.begin(), transactions.end(),
452 [](const Transaction& t) { return t.status == "Success"; });
453
454 std::cout << "Total Transactions: " << total << std::endl;
455 std::cout << "Successful: " << successful << std::endl;
456 std::cout << "Failed: " << (total - successful) << std::endl;
457 std::cout << std::endl;
458
459 // Top 5 states
460 std::map<std::string, int> stateCounts;
461 for (const auto& t : transactions) {
462 stateCounts[t.state]++;
463 }
464
465 std::cout << "Top 5 States by Transaction Count:" << std::endl;
466 std::vector<std::pair<std::string, int>> stateCountVec(stateCounts.begin(), stateCounts.end());
467 std::sort(stateCountVec.begin(), stateCountVec.end(),
468 [](const auto& a, const auto& b) { return a.second > b.second; });
469
470 int count = 0;
471 for (const auto& sc : stateCountVec) {
472 if (count++ >= 5) break;
473 std::cout << " " << sc.first << ": " << sc.second << std::endl;
474 }
475 std::cout << std::endl;
476
477 // Transaction amount stats
478 std::vector<double> amounts;
479 for (const auto& t : transactions) {
480 try {
481 amounts.push_back(std::stod(t.amount));
482 } catch (...) {
483 // Skip invalid amounts
484 }
485 }
486
487 if (!amounts.empty()) {
488 double totalAmount = std::accumulate(amounts.begin(), amounts.end(), 0.0);
489 double largest = *std::max_element(amounts.begin(), amounts.end());
490 double smallest = *std::min_element(amounts.begin(), amounts.end());
491
492 std::cout << "Transaction Amount Stats:" << std::endl;
493 std::cout << " Total: $" << std::fixed << std::setprecision(2) << totalAmount << std::endl;
494 std::cout << " Largest: $" << std::fixed << std::setprecision(2) << largest << std::endl;
495 std::cout << " Smallest: $" << std::fixed << std::setprecision(2) << smallest << std::endl;
496 }
497}